Exemplo n.º 1
0
int
lexpeekc(LexStream *L)
{
    int c = lexgetc(L);
    lexungetc(L, c);
    return c;
}
Exemplo n.º 2
0
static void
testFloat(const char *str, float fval)
{
    AST *ast;
    LexStream L;
    Token t;
    int c;
    uint32_t val;
    union {
        uint32_t i;
        float f;
    } v;
    v.f = fval;
    val = v.i;

    printf("testing number[%s]...", str);
    strToLex(&L, str, NULL);
    t = getToken(&L, &ast);
    EXPECTEQ(t, T_FLOATNUM);
    c = lexgetc(&L);
    EXPECTEQ(c, T_EOF);
    assert(ast != NULL);
    assert(ast->kind == AST_FLOAT);
    EXPECTEQ(ast->d.ival, val);
    printf("passed\n");
}
Exemplo n.º 3
0
inclusion()
{  int i,c;
	while (iswhite(c=lexgetc()));
	if (c != '<' && c != '"') {
	       my_yyerror("< or \" expected after an include statement\n",
			    "cannot continue compilation"); }
    if(c=='"'){ for(s=(char *)buffer; (c=lexgetc())!='"'; *s++ = c ){
				     if (c==EOF) { my_yyerror("reached EOF and have not found \" to close string of include\n","cannot continue compilation");
		   my_exit(1); }
	       };

		*s = '\0';
		while (iswhite(c=lexgetc()));
		if (c != ';') { my_yyerror("; expected after string",
				 "cannot continue compilation");
				my_exit(1); }
	}else{
		s = (char *)buffer;
		t = incdir;
		while(*s++ = *t++);
		s--;
		*s++ = '/';
		while(iswhite(c=lexgetc()));
		if (isalpha(c)) {
		while(isalpha(c) ||isdigit(c) ||c=='.')
			      { *s=c; c=lexgetc(); s++;
				     if (c==EOF) { my_yyerror("reached EOF and have not found >","cannot continue compilation");
		   my_exit(1); }
		 }
		*s = '\0';
		while (iswhite(c)) c=lexgetc();
		if ( c != '>' ) { my_yyerror("> needed to close include statement","cannot continue compilation"); my_exit(1); }
		while (iswhite(c=lexgetc()));
		if (c != ';') { my_yyerror("; expected after include <filename>",
				 "cannot continue compilation");
				my_exit(1); }
		} else { my_yyerror("after < a file name is required",
			"cannot continue compilation"); my_exit(1); }
	}
	if((lexin=fopen(buffer,"r"))==NULL){
		my_yyerror( "cannot open include file"," ");
		fprintf(stderr,"'%s` ",buffer);
		my_exit(1);
	}
		in_index++;
		in_files[in_index].in_fdes = lexin;
		in_files[in_index].in_name = strsave(buffer);
		in_files[in_index].in_line = 1;
			i = 0;
			while (((int)c=getc(lexin))=='\n') { }
			while ( c !='\n' && c != EOF){
			       linebuf[i]=c;
			       c = getc(lexin);
			       i++; }
			linebuf[i] = c;
			curr_index = 0;
			curr_length = i;


}
Exemplo n.º 4
0
/* parse a string */
static int
parseString(LexStream *L, AST **ast_ptr)
{
    int c;
    struct flexbuf fb;
    AST *ast;

    flexbuf_init(&fb, INCSTR);
    c = lexgetc(L);
    while (c != '"' && c > 0 && c < 256) {
        flexbuf_addchar(&fb, c);
        c = lexgetc(L);
    }
    flexbuf_addchar(&fb, '\0');

    ast = NewAST(AST_STRING, NULL, NULL);
    ast->d.string = flexbuf_get(&fb);
    *ast_ptr = ast;
    return T_STRING;
}
Exemplo n.º 5
0
/*
 * Read a number for the lexical analyzer.
 * Input is the first character of the number.
 * Return value is the lexical type.
 */
static int
lexnumber(wint_t c)
{
	wchar_t *cp;
	int dotfound = 0;
	int efound = 0;
	INT number;

	cp = linebuf;
	do {
		if (iswdigit(c))
			;
		else if (c == '.') {
			if (dotfound++)
				break;
		} else if (c == 'e' || c == 'E') {
			if ((c = lexgetc()) != '-' && c != '+') {
				lexungetc(c);
				c = 'e';
			} else
				*cp++ = 'e';
			if (efound++)
				break;
		} else
			break;
		*cp++ = c;
	} while ((c = lexgetc()) != WEOF);
	*cp = '\0';
	if (dotfound && cp == linebuf+1)
		return (DOT);
	lexungetc(c);
	errno = 0;
	if (!dotfound && !efound &&
	    ((number = wcstol(linebuf, (wchar_t **)0, 10)), errno != ERANGE))
		yylval.node = intnode(number);
	else
		yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0));
	return (CONSTANT);
}
Exemplo n.º 6
0
yylex()
{
	int k;
	STRING strsave();
	STRING getstring(),getword();
	float getnum();

	while(iswhite(c=lexgetc()));
	if ( (isalpha(c)||c=='@') && c!=EOF ) { s = getword(c);
				      if ((k=keyfind(s))!=NKEYWORDS && cconst)
				       {  yylval.strg = keywords[k].keyname;
					  cconst=false;
					  return(keywords[k].keyret);  }
				      yylval.strg = s;
				      return(WORD); }

	if ( isdigit(c)||c=='~' ){ yylval.numb=(float)getnum(c);
				   return(NUMB);}
	if ( c=='`') { c = lexgetc();
			yylval.strg=getstring(c);
			return(STRING_QUOTED); }
	if ( c == '[' ) cconst=true;
	return(c);
}
Exemplo n.º 7
0
static void
testNumber(const char *str, uint32_t val)
{
    AST *ast;
    LexStream L;
    Token t;
    int c;
    printf("testing number[%s]...", str);
    strToLex(&L, str, NULL);
    t = getToken(&L, &ast);
    EXPECTEQ(t, T_NUM);
    c = lexgetc(&L);
    EXPECTEQ(c, T_EOF);
    assert(ast != NULL);
    assert(ast->kind == AST_INTEGER);
    EXPECTEQ(ast->d.ival, val);
    printf("passed\n");
}
Exemplo n.º 8
0
/*
 * Read a string or regular expression, terminated by ``endc'',
 * for lexical analyzer, processing escape sequences.
 * Return string length.
 */
static size_t
lexescape(wint_t endc, int regx, int cmd_line_operand)
{
	static char nlre[256];
	static char nlstr[256];
	static char eofre[256];
	static char eofstr[256];
	int first_time = 1;
	wint_t c;
	wchar_t *cp;
	int n, max;

	if (first_time == 1) {
		(void) strcpy(nlre, gettext("Newline in regular expression\n"));
		(void) strcpy(nlstr, gettext("Newline in string\n"));
		(void) strcpy(eofre, gettext("EOF in regular expression\n"));
		(void) strcpy(eofstr, gettext("EOF in string\n"));
		first_time = 0;
	}

	cp = linebuf;
	while ((c = lexgetc()) != endc) {
		if (c == '\n')
			awkerr(regx ? nlre : nlstr);
		if (c == '\\') {
			switch (c = lexgetc(), c) {
			case '\\':
				if (regx)
					*cp++ = '\\';
				break;

			case '/':
				c = '/';
				break;

			case 'n':
				c = '\n';
				break;

			case 'b':
				c = '\b';
				break;

			case 't':
				c = '\t';
				break;

			case 'r':
				c = '\r';
				break;

			case 'f':
				c = '\f';
				break;

			case 'v':
				c = '\v';
				break;

			case 'a':
				c = (char)0x07;
				break;

			case 'x':
				n = 0;
				while (iswxdigit(c = lexgetc())) {
					if (iswdigit(c))
						c -= '0';
					else if (iswupper(c))
						c -= 'A'-10;
					else
						c -= 'a'-10;
					n = (n<<4) + c;
				}
				lexungetc(c);
				c = n;
				break;

			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
#if 0
/*
 * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
 * requires processing of the octal escapes both in strings and
 * regular expressions. The following code is disabled instead of
 * removed as back-referencing may be reintroduced in a future draft
 * of the standard.
 */
				/*
				 * For regular expressions, we disallow
				 * \ooo to mean octal character, in favour
				 * of back referencing.
				 */
				if (regx) {
					*cp++ = '\\';
					break;
				}
#endif
				max = 3;
				n = 0;
				do {
					n = (n<<3) + c-'0';
					if ((c = lexgetc()) > '7' || c < '0')
						break;
				} while (--max);
				lexungetc(c);
				/*
				 * an octal escape sequence must have at least
				 * 2 digits after the backslash, otherwise
				 * it gets passed straight thru for possible
				 * use in backreferencing.
				 */
				if (max == 3) {
					*cp++ = '\\';
					n += '0';
				}
				c = n;
				break;

			case '\n':
				continue;

			default:
				if (c != endc || cmd_line_operand) {
					*cp++ = '\\';
					if (c == endc)
						lexungetc(c);
				}
			}
		}
		if (c == WEOF)
			awkerr(regx ? eofre : eofstr);
		*cp++ = c;
	}
	*cp = '\0';
	return (cp - linebuf);
}
Exemplo n.º 9
0
/*
 * Read an identifier.
 * Input is first character of identifier.
 * Return VAR.
 */
static int
lexid(wint_t c)
{
	wchar_t *cp;
	size_t i;
	NODE *np;

	cp = linebuf;
	do {
		*cp++ = c;
		c = lexgetc();
	} while (iswalpha(c) || iswdigit(c) || c == '_');
	*cp = '\0';
	lexungetc(c);
	yylval.node = np = vlook(linebuf);

	switch (np->n_type) {
	case KEYWORD:
		switch (np->n_keywtype) {
		case PRINT:
		case PRINTF:
			++inprint;
			/* FALLTHROUGH */
		default:
			return ((int)np->n_keywtype);
		}
		/* NOTREACHED */

	case ARRAY:
	case VAR:
		/*
		 * If reading the argument list, create a dummy node
		 * for the duration of that function. These variables
		 * can be removed from the symbol table at function end
		 * but they must still exist because the execution tree
		 * knows about them.
		 */
		if (funparm) {
do_funparm:
			np = emptynode(PARM, i = (cp-linebuf));
			np->n_flags = FSTRING;
			np->n_string = _null;
			np->n_strlen = 0;
			(void) memcpy(np->n_name, linebuf,
			    (i+1) * sizeof (wchar_t));
			addsymtab(np);
			yylval.node = np;
		} else if (np == varNF || (np == varFS &&
		    (!doing_begin || begin_getline))) {
			/*
			 * If the user program references NF or sets
			 * FS either outside of a begin block or
			 * in a begin block after a getline then the
			 * input line will be split immediately upon read
			 * rather than when a field is first referenced.
			 */
			needsplit = 1;
		} else if (np == varENVIRON)
			needenviron = 1;
	/* FALLTHROUGH */
	case PARM:
		return (VAR);

	case UFUNC:
		/*
		 * It is ok to redefine functions as parameters
		 */
		if (funparm) goto do_funparm;
	/* FALLTHROUGH */
	case FUNC:
	case GETLINE:
		/*
		 * When a getline is encountered, clear the 'doing_begin' flag.
		 * This will force the 'needsplit' flag to be set, even inside
		 * a begin block, if FS is altered. (See VAR case above)
		 */
		if (doing_begin)
			begin_getline = 1;
		return (np->n_type);
	}
	/* NOTREACHED */
	return (0);
}
Exemplo n.º 10
0
/*
 * The lexical analyzer.
 */
int
yylex()
{
	wint_t c, c1;
	int i;
	static int savetoken = 0;
	static int wasfield;
	static int isfuncdef;
	static int nbrace, nparen, nbracket;
	static struct ctosymstruct {
		wint_t c, sym;
	} ctosym[] = {
		{ '|', BAR },		{ '^', CARAT },
		{ '~', TILDE },		{ '<', LANGLE },
		{ '>', RANGLE },	{ '+', PLUSC },
		{ '-', HYPHEN },	{ '*', STAR },
		{ '/', SLASH },		{ '%', PERCENT },
		{ '!', EXCLAMATION },	{ '$', DOLLAR },
		{ '[', LSQUARE },	{ ']', RSQUARE },
		{ '(', LPAREN },	{ ')', RPAREN },
		{ ';', SEMI },		{ '{', LBRACE },
		{ '}', RBRACE },	{   0, 0 }
	};

	if (savetoken) {
		c = savetoken;
		savetoken = 0;
	} else if (redelim != '\0') {
		c = redelim;
		redelim = 0;
		catterm = 0;
		savetoken = c;
		c = lexlast = lexregexp(c);
		goto out;
	} else while ((c = lexgetc()) != WEOF) {
		if (iswalpha(c) || c == '_') {
			c = lexid(c);
		} else if (iswdigit(c) || c == '.') {
			c = lexnumber(c);
		} else if (isWblank(c)) {
			continue;
		} else switch (c) {
#if DOS || OS2
		case 032:		/* ^Z */
			continue;
#endif

		case '"':
			c = lexstring(c);
			break;

		case '#':
			while ((c = lexgetc()) != '\n' && c != WEOF)
				;
			lexungetc(c);
			continue;

		case '+':
			if ((c1 = lexgetc()) == '+')
				c = INC;
			else if (c1 == '=')
				c = AADD;
			else
				lexungetc(c1);
			break;

		case '-':
			if ((c1 = lexgetc()) == '-')
				c = DEC;
			else if (c1 == '=')
				c = ASUB;
			else
				lexungetc(c1);
			break;

		case '*':
			if ((c1 = lexgetc()) == '=')
				c = AMUL;
			else if (c1 == '*') {
				if ((c1 = lexgetc()) == '=')
					c = AEXP;
				else {
					c = EXP;
					lexungetc(c1);
				}
			} else
				lexungetc(c1);
			break;

		case '^':
			if ((c1 = lexgetc()) == '=') {
				c = AEXP;
			} else {
				c = EXP;
				lexungetc(c1);
			}
			break;

		case '/':
			if ((c1 = lexgetc()) == '=' &&
			    lexlast != RE && lexlast != NRE &&
			    lexlast != ';' && lexlast != '\n' &&
			    lexlast != ',' && lexlast != '(')
				c = ADIV;
			else
				lexungetc(c1);
			break;

		case '%':
			if ((c1 = lexgetc()) == '=')
				c = AREM;
			else
				lexungetc(c1);
			break;

		case '&':
			if ((c1 = lexgetc()) == '&')
				c = AND;
			else
				lexungetc(c1);
			break;

		case '|':
			if ((c1 = lexgetc()) == '|')
				c = OR;
			else {
				lexungetc(c1);
				if (inprint)
					c = PIPE;
			}
			break;

		case '>':
			if ((c1 = lexgetc()) == '=')
				c = GE;
			else if (c1 == '>')
				c = APPEND;
			else {
				lexungetc(c1);
				if (nparen == 0 && inprint)
					c = WRITE;
			}
			break;

		case '<':
			if ((c1 = lexgetc()) == '=')
				c = LE;
			else
				lexungetc(c1);
			break;

		case '!':
			if ((c1 = lexgetc()) == '=')
				c = NE;
			else if (c1 == '~')
				c = NRE;
			else
				lexungetc(c1);
			break;

		case '=':
			if ((c1 = lexgetc()) == '=')
				c = EQ;
			else {
				lexungetc(c1);
				c = ASG;
			}
			break;

		case '\n':
			switch (lexlast) {
			case ')':
				if (catterm || inprint) {
					c = ';';
					break;
				}
			/* FALLTHROUGH */
			case AND:
			case OR:
			case COMMA:
			case '{':
			case ELSE:
			case ';':
			case DO:
				continue;

			case '}':
				if (nbrace != 0)
					continue;
				/* FALLTHROUGH */

			default:
				c = ';';
				break;
			}
			break;

		case ELSE:
			if (lexlast != ';') {
				savetoken = ELSE;
				c = ';';
			}
			break;

		case '(':
			++nparen;
			break;

		case ')':
			if (--nparen < 0)
				awkerr(unbal, "()");
			break;

		case '{':
			nbrace++;
			break;

		case '}':
			if (--nbrace < 0) {
				char brk[3];

				brk[0] = '{';
				brk[1] = '}';
				brk[2] = '\0';
				awkerr(unbal, brk);
			}
			if (lexlast != ';') {
				savetoken = c;
				c = ';';
			}
			break;

		case '[':
			++nbracket;
			break;

		case ']':
			if (--nbracket < 0) {
				char brk[3];

				brk[0] = '[';
				brk[1] = ']';
				brk[2] = '\0';
				awkerr(unbal, brk);
			}
			break;

		case '\\':
			if ((c1 = lexgetc()) == '\n')
				continue;
			lexungetc(c1);
			break;

		case ',':
			c = COMMA;
			break;

		case '?':
			c = QUEST;
			break;

		case ':':
			c = COLON;
			break;

		default:
			if (!iswprint(c))
				awkerr(
				    gettext("invalid character \"%s\""),
				    toprint(c));
			break;
		}
		break;
	}

	switch (c) {
	case ']':
		++catterm;
		break;

	case VAR:
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		} else if (!isfuncdef) {
			if ((c1 = lexgetc()) != '(')
				++catterm;
			lexungetc(c1);
		}
		isfuncdef = 0;
		break;

	case PARM:
	case CONSTANT:
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		} else {
			if (lexlast == '$')
				wasfield = 2;
			++catterm;
		}
		break;

	case INC:
	case DEC:
		if (!catterm || lexlast != CONSTANT || wasfield)
			break;

	/* FALLTHROUGH */
	case UFUNC:
	case FUNC:
	case GETLINE:
	case '!':
	case '$':
	case '(':
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		}
		break;

	case '}':
		if (nbrace == 0)
			savetoken = ';';
	/* FALLTHROUGH */
	case ';':
		inprint = 0;
	/* FALLTHROUGH */
	default:
		if (c == DEFFUNC)
			isfuncdef = 1;
		catterm = 0;
	}
	lexlast = c;
	if (wasfield)
		wasfield--;
	/*
	 * Map character constants to symbolic names.
	 */
	for (i = 0; ctosym[i].c != 0; i++)
		if (c == ctosym[i].c) {
			c = ctosym[i].sym;
			break;
		}
out:
#ifdef DEBUG
	if (dflag)
		(void) printf("%d\n", (int)c);
#endif
	return ((int)c);
}
Exemplo n.º 11
0
int
getToken(LexStream *L, AST **ast_ptr)
{
//    int base = 10;
    int c;
    AST *ast = NULL;
    int at_startofline = (L->eoln == 1);
    int peekc;
    c = skipSpace(L, &ast);

    if (c >= 127) {
        *ast_ptr = last_ast = ast;
        return c;
    } else if (safe_isdigit(c)) {
        lexungetc(L,c);
        ast = NewAST(AST_INTEGER, NULL, NULL);
        c = parseNumber(L, 10, &ast->d.ival);
        if (c == T_FLOATNUM)
            ast->kind = AST_FLOAT;
    } else if (c == '$') {
        ast = NewAST(AST_INTEGER, NULL, NULL);
        c = parseNumber(L, 16, &ast->d.ival);
    } else if (c == '%') {
        ast = NewAST(AST_INTEGER, NULL, NULL);
        c = lexgetc(L);
        if (c == '%') {
            c = parseNumber(L, 4, &ast->d.ival);
        } else {
            lexungetc(L, c);
            c = parseNumber(L, 2, &ast->d.ival);
        }
    } else if (isIdentifierStart(c)) {
        lexungetc(L, c);
        c = parseIdentifier(L, &ast, NULL);
        /* if in pasm, and at start of line, restart temporary
           labels */
        if (c == T_IDENTIFIER && InDatBlock(L) && at_startofline) {
            L->lastGlobal = ast->d.string;
        }
    } else if (c == ':') {
        peekc = lexgetc(L);
        if (peekc == '=') {
            c = T_ASSIGN;
        } else if (!gl_p2 && isIdentifierStart(peekc) && InDatBlock(L)) {
            lexungetc(L, peekc);
            c = parseIdentifier(L, &ast, L->lastGlobal ? L->lastGlobal : "");
        } else {
            lexungetc(L, peekc);
        }
    } else if (gl_p2 && c == '.' && isIdentifierStart(lexpeekc(L)) && InDatBlock(L)) {
            c = parseIdentifier(L, &ast, L->lastGlobal ? L->lastGlobal : "");
    } else if (strchr(operator_chars, c) != NULL) {
        char op[6];
        int i;
        int token;
        Symbol *sym = NULL;

        op[0] = token = c;
        for (i = 1; i < sizeof(op)-1; i++) {
            c = lexgetc(L);
            if (c >= 128 || strchr(operator_chars, c) == NULL) {
                lexungetc(L, c);
                break;
            }
            op[i] = c;
            op[i+1] = 0;
            sym = FindSymbol(&reservedWords, op);
            if (sym) {
                token = INTVAL(sym);
            } else {
                lexungetc(L, c);
                break;
            }
        }
        c = token;
    } else if (c == '"') {
        c = parseString(L, &ast);
    }
    *ast_ptr = last_ast = ast;
    return c;
}
Exemplo n.º 12
0
int
skipSpace(LexStream *L, AST **ast_ptr)
{
    int c;
    int commentNest;
    int start_indent;
    struct flexbuf cb;
    AST *ast;
    int startcol = 0;
    int startline = 0;
    
    flexbuf_init(&cb, INCSTR);
    c = lexgetc(L);
again:
    while (c == ' ' || c == '\t') {
        c = lexgetc(L);
    }

    /* ignore completely empty lines or ones with just comments */
    if (c == '\'') {
        while (c == '\'') c = lexgetc(L);
        while (c != '\n' && c != T_EOF) {
            flexbuf_addchar(&cb, c);
            c = lexgetc(L);
        }
        flexbuf_addchar(&cb, '\n');
        flexbuf_addchar(&cb, 0);
        ast = NewAST(AST_COMMENT, NULL, NULL);
        ast->d.string = flexbuf_get(&cb);
        comment_chain = AddToList(comment_chain, ast);
    }
    if (c == '{') {
        struct flexbuf anno;
        int annotate = 0;
        int directive = 0;
	int doccomment = 0;
        
        startcol = L->colCounter;
        startline = L->lineCounter;
        flexbuf_init(&anno, INCSTR);
        commentNest = 1;
        /* check for special comments {++... } which indicate 
           inline C code
           We also set up the preprocessor to emit {#line xx} directives when
           doing #include
        */
        c = lexgetc(L);
        if (c == '+') {
            c = lexgetc(L);
            if (c == '+') {
                annotate = 1;
                c = lexgetc(L);
            }
        } else if (c == '#') {
            c = lexgetc(L);
            directive = 1;
        } else if (c == '{') {
	    c = lexgetc(L);
	    doccomment = 1;
	}
        lexungetc(L, c);
        for(;;) {
            c = lexgetc(L);
            if (c == '{' && !doccomment)
                commentNest++;
            else if (c == '}') {
	        if (doccomment) {
	            int peekc;
		    peekc = lexgetc(L);
		    if (peekc == '}') {
		        commentNest = 0;
		    } else {
		        lexungetc(L, peekc);
		    }
		} else {
		  --commentNest;
		}
	    }
            if (commentNest <= 0 || c == T_EOF) {
                break;
            }
            if (annotate || directive) {
                flexbuf_addchar(&anno, c);
            } else {
                flexbuf_addchar(&cb, c);
            }
        }
        if (c == T_EOF) {
	    if (commentNest > 0)
	        fprintf(stderr, "WARNING: EOF seen inside comment\n");
            return c;
	}
        if (annotate) {
            AST *ast = NewAST(AST_ANNOTATION, NULL, NULL);
            flexbuf_addchar(&anno, '\0');
            ast->d.string = flexbuf_get(&anno);
            *ast_ptr = ast;
            // if this is indented and inside a PUB or PRI,
            // then treat it as inline C code
            if (startcol > 1 && startline > L->block_firstline && (L->in_block == T_PUB || L->in_block == T_PRI)) {
                return T_INLINECCODE;
            }
            return T_ANNOTATION;
        } else if (directive) {
            char *dir;
            flexbuf_addchar(&anno, '\0');
            dir = flexbuf_get(&anno);

            if (!strncmp(dir, "line ", 5)) {
                char *ptr = dir+5;
                int lineno;
                lineno = strtol(ptr, &ptr, 10);
                if (lineno > 0) {
                    if (*ptr == ' ') ptr++;
                    L->fileName = strdup(ptr);
                    L->lineCounter = lineno;
                }
            }
            free(dir);
        } else {
            flexbuf_addchar(&cb, '\0');
            ast = NewAST(AST_COMMENT, NULL, NULL);
            ast->d.string = flexbuf_get(&cb);
            comment_chain = AddToList(comment_chain, ast);
        }
        c = lexgetc(L);
        goto again;
    }

    if (L->eoln && (L->in_block == T_PUB || L->in_block == T_PRI)) {
        if (c == '\n') {
            c = lexgetc(L);
            goto again;
        }
        /* if there is a pending indent, send it back */
        if (L->pending_indent) {
            lexungetc(L, c);
            --L->pending_indent;
            return T_INDENT;
        }
        /* on EOF send as many OUTDENTS as we need */
        if (c == T_EOF) {
            if (L->indentsp > 0) {
                lexungetc(L, c);
                --L->indentsp;
                return T_OUTDENT;
            }
        }
        /* if our indentation is <= the start value, send back an outdent */
        start_indent = L->colCounter-1;
        if (start_indent <= L->indent[L->indentsp] && L->indentsp > 0) {
            lexungetc(L, c);
            --L->indentsp;
            return T_OUTDENT;
        }
    }
    // force an end-of line at EOF
    if (c == T_EOF && !L->eoln && !L->eof) {
        L->eof = L->eoln = 1;
        return T_EOLN;
    }
    if (L->eoln) {
        L->eoln = 0;
        L->firstNonBlank = L->colCounter-1;
    }
    if (c == '\n') {
        L->eoln = 1;
        return T_EOLN;
    }
    if (current && !current->sawToken) {
        current->sawToken = 1;
        current->topcomment = GetComments();
    }
    return c;
}
Exemplo n.º 13
0
/* parse an identifier */
static int
parseIdentifier(LexStream *L, AST **ast_ptr, const char *prefix)
{
    int c;
    struct flexbuf fb;
    Symbol *sym;
    AST *ast = NULL;
    int startColumn = L->colCounter - 1;
    char *idstr;

    flexbuf_init(&fb, INCSTR);
    if (prefix) {
        flexbuf_addmem(&fb, prefix, strlen(prefix));
        if (gl_gas_dat) {
            flexbuf_addchar(&fb, '.');
        } else {
            flexbuf_addchar(&fb, ':');
        }
    }
    c = lexgetc(L);
    while (isIdentifierChar(c)) {
        //flexbuf_addchar(&fb, tolower(c));
        flexbuf_addchar(&fb, c);
        c = lexgetc(L);
    }
    // add a trailing 0, and make sure there is room for an extra
    // character in case the name mangling needs it
    flexbuf_addchar(&fb, '\0');
    flexbuf_addchar(&fb, '\0');
    idstr = flexbuf_get(&fb);
    lexungetc(L, c);

    /* check for reserved words */
    if (InDatBlock(L)) {
        sym = FindSymbol(&pasmWords, idstr);
        if (sym) {
            free(idstr);
            if (sym->type == SYM_INSTR) {
                ast = NewAST(AST_INSTR, NULL, NULL);
                ast->d.ptr = sym->val;
                *ast_ptr = ast;
                return T_INSTR;
            }
            if (sym->type == SYM_INSTRMODIFIER) {
                ast = NewAST(AST_INSTRMODIFIER, NULL, NULL);
                ast->d.ptr = sym->val;
                *ast_ptr = ast;
                return T_INSTRMODIFIER;
            }
            fprintf(stderr, "Internal error: Unknown pasm symbol type %d\n", sym->type);
        }
    }
    sym = FindSymbol(&reservedWords, idstr);
    if (sym != NULL) {
        if (sym->type == SYM_BUILTIN)
        {
            /* run any parse hooks */
            Builtin *b = (Builtin *)sym->val;
            if (b && b->parsehook) {
                (*b->parsehook)(b);
            }
            goto is_identifier;
        }
        if (sym->type == SYM_CONSTANT
            || sym->type == SYM_FLOAT_CONSTANT)
        {
            goto is_identifier;
        }
        free(idstr);
        if (sym->type == SYM_RESERVED) {
            c = INTVAL(sym);
            /* check for special handling */
            switch(c) {
            case T_PUB:
            case T_PRI:
            case T_DAT:
            case T_OBJ:
            case T_VAR:
            case T_CON:
                L->in_block = c;
                L->block_firstline = L->lineCounter;
                //EstablishIndent(L, 1);
                break;
	    case T_ASM:
	        if (L->in_block == T_ASM) {
		    fprintf(stderr, "WARNING: ignoring nested asm\n");
		} else {
		    L->save_block = L->in_block;
		}
		L->in_block = c;
		break;
	    case T_ENDASM:
	        L->in_block = L->save_block;
	        break;
            case T_IF:
            case T_IFNOT:
            case T_ELSE:
            case T_ELSEIF:
            case T_ELSEIFNOT:
            case T_REPEAT:
            case T_CASE:
                EstablishIndent(L, startColumn);
                break;
            default:
                break;
            }
            if (!ast)
                ast = GetComments();
            *ast_ptr = ast;
            return c;
        }
        if (sym->type == SYM_HWREG) {
            ast = NewAST(AST_HWREG, NULL, NULL);
            ast->d.ptr = sym->val;
            *ast_ptr = ast;
            return T_HWREG;
        }
        fprintf(stderr, "Internal error: Unknown symbol type %d\n", sym->type);
    }

is_identifier:
    ast = NewAST(AST_IDENTIFIER, NULL, NULL);
    /* make sure identifiers do not conflict with C keywords */
    if (gl_normalizeIdents || Is_C_Reserved(idstr)) {
        NormalizeIdentifier(idstr);
    }
    ast->d.string = idstr;
    *ast_ptr = ast;
    return T_IDENTIFIER;
}
Exemplo n.º 14
0
/*
 * actual parsing functions
 */
static int
parseNumber(LexStream *L, unsigned int base, uint32_t *num)
{
    unsigned long uval, digit;
    unsigned int c;
    int sawdigit = 0;
    int kind = T_NUM;

    uval = 0;

    for(;;) {
        c = lexgetc(L);
        if (c == '_')
            continue;
        else if (c >= 'A' && c <= 'Z')
            digit = 10 + c - 'A';
        else if (c >= 'a' && c <= 'z')
            digit = 10 + c - 'a';
        else if (c >= '0' && c <= '9') {
            digit = (c - '0');
        } else {
            break;
        }
        if (digit < base) {
            uval = base * uval + digit;
            sawdigit = 1;
        } else {
            break;
        }
    }
    if ( base == 10 && (c == '.' || c == 'e' || c == 'E') ) {
        /* potential floating point number */
        float f = (float)uval;
        float ff = 0.0;
        static float divby[45] = {
            1e-1f, 1e-2f, 1e-3f, 1e-4f, 1e-5f,
            1e-6f, 1e-7f, 1e-8f, 1e-9f, 1e-10f,
            1e-11f, 1e-12f, 1e-13f, 1e-14f, 1e-15f,
            1e-16f, 1e-17f, 1e-18f, 1e-19f, 1e-20f,
            1e-21f, 1e-22f, 1e-23f, 1e-24f, 1e-25f,
            1e-26f, 1e-27f, 1e-28f, 1e-29f, 1e-30f,
            1e-31f, 1e-32f, 1e-33f, 1e-34f, 1e-35f,
            1e-36f, 1e-37f, 1e-38f, 1e-39f, 1e-40f,
            1e-41f, 1e-42f, 1e-43f, 1e-44f, 1e-45f,
        };
        int counter = 0;
        int exponent = 0;

        if (c == '.') {
            c = lexgetc(L);
            if ( c != 'e' && c != 'E' && (c < '0' || c > '9')) {
                lexungetc(L, c);
                c = '.';
                goto donefloat;
            }
        }
        while (c >= '0' && c <= '9') {
            ff = ff + divby[counter]*(float)(c-'0');
            c = lexgetc(L);
            counter++;
        }
        if (c == 'e' || c == 'E') {
            int expval = 0;
            int neg = 1;
            c = lexgetc(L);
            if (c == '+') {
                c = lexgetc(L);
            } else if (c == '-') {
                c = lexgetc(L);
                neg = -neg;
            }
            while (c >= '0' && c <= '9') {
                expval = 10*expval + (c - '0');
                c = lexgetc(L);
            }
            if (neg < 0)
                expval = -expval;
            exponent += expval;
        }
        f = f + ff;
        if (exponent < 0 && exponent >= -45) {
            f *= divby[-(exponent+1)];
        } else if (exponent != 0) {
            f *= powf(10.0f, (float)exponent);
        }
        uval = floatAsInt(f);
        kind = T_FLOATNUM;
    }
donefloat:
    lexungetc(L, c);
    *num = uval;
    return sawdigit ? kind : ((base == 16) ? T_HERE : '%');
}
Exemplo n.º 15
0
yylex()
{
	int k;
	STRING synfind(),strsave(),getident(),getword2(),getstring(),getword();
	float getnum();
	while(iswhite(c=lexgetc()));
	startoflex = curr_index;
	if((isalpha(c)||c=='@' )&& c != EOF) {
		s = getident(c);
		if(strcmp("include",s)==0){
			inclusion();
			return(yylex());
		}else if((k=keyfind(synfind(s)))!=NKEYWORDS && const_list == 0){
			   yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->strg =  keywords[k].keyname;
			   yylval.resptr->line = in_files[in_index].in_line;
			   yylval.resptr->len  = startoflex;
				return(keywords[k].keyret);
		}else{ if (const_list != 0)
			{
			  yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			  yylval.resptr->strg = s;
			  yylval.resptr->line = in_files[in_index].in_line;
			  yylval.resptr->len  =
			  startoflex;
			  return(WORD);
			}
			else
			{
			  yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			  yylval.resptr->strg = s;
			  yylval.resptr->line = in_files[in_index].in_line;
			  yylval.resptr->len  =
			  startoflex;
			  return(IDENT);
			}
		     }
       }else if((isdigit(c) || c == '~') && c !=EOF) {
		yylval.numb = (float) getnum(c);
		return(CONST);
	}else{
		switch(c){
		case '+':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			if (const_list!=0) {
			     yylval.resptr->strg=getword2(c);
			   yylval.resptr->line = in_files[in_index].in_line;
			   yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			yylval.resptr->strg = "plus";
			yylval.resptr->line = in_files[in_index].in_line;
			yylval.resptr->len  =
			  startoflex;
			return(ADDOP);
		case '-':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			if (const_list!=0) {
			     yylval.resptr->strg=getword2(c);
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			yylval.resptr->strg =  "minus";
			   yylval.resptr->line = in_files[in_index].in_line;
			yylval.resptr->len  =
			  startoflex;
			return(ADDOP);
		case '*':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			if (const_list!=0) {
			     yylval.resptr->strg=getword2(c);
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			c=lexgetc();
			yylval.resptr->line = in_files[in_index].in_line;
			if (c=='*') { yylval.resptr->strg =  "exp";
				      yylval.resptr->len  = startoflex;
				      return(EXP); }
			peekc = c;
			yylval.resptr->strg =  "times";
			yylval.resptr->len  = startoflex;
			return(MULOP);
		case '/': c = lexgetc();
			if (const_list!=0) {
			     peekc = c;
			     yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			     yylval.resptr->strg=getword2('/');
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			if (c=='/'){
			while((c=lexgetc())!='\n');
			peekc = c;
			return(yylex()); }
			peekc = c;
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			yylval.resptr->strg =  "fdiv";
			   yylval.resptr->line = in_files[in_index].in_line;
			yylval.resptr->len  =
			  startoflex;
			return(MULOP);
		case '^':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			if (const_list!=0) {
			     yylval.resptr->strg=getword2(c);
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			yylval.resptr->strg ="strconc";
			   yylval.resptr->line = in_files[in_index].in_line;
			yylval.resptr->len  =
			  startoflex;
			return(STRCONC);
		case '#':
			if (const_list!=0) {
			     yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			     yylval.resptr->strg=getword2(c);
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			return(c);
		case '"':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			if (const_list!=0) {
			     yylval.resptr->strg=getword2(c);
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			c = lexgetc() ;
			yylval.resptr->strg = getword(c);
			yylval.resptr->len  =
			  startoflex;
			return(WORD);
		case '`':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			c = lexgetc();
			yylval.resptr->strg =  getstring(c);
			yylval.resptr->len  =
			  startoflex;
			return(STRING_QUOTED);
		case ':':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			   yylval.resptr->len  = startoflex;
			if (const_list!=0) {
			     yylval.resptr->strg = getword(c);
			     return(WORD); }
			c = lexgetc();
			if (c==':') { yylval.resptr->strg = "cons";
					return(CONS); } else {
					peekc=c;
					return(COLON);}
		case '[': c = lexgetc();
			if (c=='%' && const_list!=0) {
			     peekc = c;
			     yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->strg=getword2('[');
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			if (c == '%') { return(CLSTART);
				      }
				      else
				      { peekc = c;
					const_list++;
					return(LSTART);
				      }
		case '%':c = lexgetc();
			if ((c==']' || c==')') && const_list!=0) {
			     peekc = c;
			     yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->strg=getword2('%');
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			  if (c == ']')
				       {  return(CLEND);
				       }
				       else
				       {
				       my_yyerror("incorrect use of %","cannot continue compliation ");
					 my_exit(1);
				       }
		case ']':
				       { const_list--;
					 return(LEND);
				       }
		case '.':
			if (const_list!=0) {
			     yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->strg=getword2(c);
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			return(DOT);
		case '<':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			if (const_list!=0) {
			     yylval.resptr->strg=getword2(c);
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			c=lexgetc();
					if (c=='=') {
						   yylval.resptr->strg =  "le";
						   yylval.resptr->len  =
			  startoflex;
						     return(RELOP); } else
					if (c =='>') {
						 yylval.resptr->strg = "append";
						 yylval.resptr->len  =
			  startoflex;
						   return(APPEND); } else {
						     peekc=c;
						     yylval.resptr->strg = "lt";
						     yylval.resptr->len  =
			  startoflex;
						     return(RELOP);  }
		case '>':
			yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			if (const_list!=0) {
			     yylval.resptr->strg=getword2(c);
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			c=lexgetc();
			if (c=='=') { yylval.resptr->strg =  "ge";
				      yylval.resptr->len  =
			  startoflex;
					return(RELOP); } else {
						 yylval.resptr->strg = "gt";
						 yylval.resptr->len  =
			  startoflex;
						 peekc=c;
						 return(RELOP); }
		case '&':
		case ',':
		case ';':
		case '=':
		case '$':
		case '(':
		case ')':
			if (const_list!=0) {
			     yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			   yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->strg=getword2(c);
			     yylval.resptr->len  =
			  startoflex;
			     return(WORD); }
			 if (c==',') {
			     yylval.resptr = (RESPTR) calloc(1,sizeof(RES));
			     yylval.resptr->line = in_files[in_index].in_line;
			     yylval.resptr->len  = startoflex;
			     return(COMMA); }
		default:
			return(c);
		}
	}
}