/* ** generates an error for an undefined 'goto'; choose appropriate ** message when label name is a reserved word (which can only be 'break') */ static l_noret undefgoto (LexState *ls, Labeldesc *gt) { const char *msg = isreserved(gt->name) ? "<%s> at line %d not inside a loop" : "no visible label '%s' for <goto> at line %d"; msg = luaO_pushfstring(ls->L, msg, getstr(gt->name), gt->line); semerror(ls, msg); }
/* * generates an error for an undefined 'goto'; choose appropriate * message when label name is a reserved word (which can only be 'break') */ static void undefgoto(ktap_lexstate *ls, ktap_labeldesc *gt) { const char *msg = isreserved(gt->name) ? "<%s> at line %d not inside a loop" : "no visible label " KTAP_QS " for <goto> at line %d"; msg = ktapc_sprintf(msg, getstr(gt->name), gt->line); semerror(ls, msg); }
/* return pointer to string after the search string; input: result of * skip_path() */ static const char * skip_search(const char *uri) { if (*uri == '?') { uri++; while (isreserved(*uri) || isunreserved(*uri) || (*uri == '%' && ishex(uri[1]) && ishex(uri[2]))) { uri += *uri == '%' ? 3 : 1; } } return uri; }
static int llex (LexState *ls, SemInfo *seminfo) { luaZ_resetbuffer(ls->buff); for (;;) { switch (ls->current) { case '\n': case '\r': { /* line breaks */ inclinenumber(ls); break; } case ' ': case '\f': case '\t': case '\v': { /* spaces */ next(ls); break; } case '-': { /* '-' or '--' (comment) */ next(ls); if (ls->current != '-') return '-'; /* else is a comment */ next(ls); if (ls->current == '[') { /* long comment? */ int sep = skip_sep(ls); luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */ if (sep >= 0) { read_long_string(ls, NULL, sep); /* skip long comment */ luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ break; } } /* else short comment */ while (!currIsNewline(ls) && ls->current != EOZ) next(ls); /* skip until end of line (or end of file) */ break; } case '[': { /* long string or simply '[' */ int sep = skip_sep(ls); if (sep >= 0) { read_long_string(ls, seminfo, sep); return TK_STRING; } else if (sep != -1) /* '[=...' missing second bracket */ lexerror(ls, "invalid long string delimiter", TK_STRING); return '['; } case '=': { next(ls); if (check_next1(ls, '=')) return TK_EQ; else return '='; } case '<': { next(ls); if (check_next1(ls, '=')) return TK_LE; else if (check_next1(ls, '<')) return TK_SHL; else return '<'; } case '>': { next(ls); if (check_next1(ls, '=')) return TK_GE; else if (check_next1(ls, '>')) return TK_SHR; else return '>'; } case '/': { next(ls); if (check_next1(ls, '/')) return TK_IDIV; else return '/'; } case '~': { next(ls); if (check_next1(ls, '=')) return TK_NE; else return '~'; } case ':': { next(ls); if (check_next1(ls, ':')) return TK_DBCOLON; else return ':'; } case '"': case '\'': { /* short literal strings */ read_string(ls, ls->current, seminfo); return TK_STRING; } case '`': { /* relative paths */ read_string(ls, ls->current, seminfo); return TK_PATH; } case '.': { /* '.', '..', '...', or number */ save_and_next(ls); if (check_next1(ls, '.')) { if (check_next1(ls, '.')) return TK_DOTS; /* '...' */ else return TK_CONCAT; /* '..' */ } else if (!lisdigit(ls->current)) return '.'; else return read_numeral(ls, seminfo); } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { return read_numeral(ls, seminfo); } case EOZ: { return TK_EOS; } default: { if (lislalpha(ls->current)) { /* identifier or reserved word? */ TString *ts; do { save_and_next(ls); } while (lislalnum(ls->current)); ts = luaX_newstring(ls, luaZ_buffer(ls->buff), luaZ_bufflen(ls->buff)); seminfo->ts = ts; if (isreserved(ts)) /* reserved word? */ return ts->extra - 1 + FIRST_RESERVED; else { return TK_NAME; } } else { /* single-char tokens (+ - / ...) */ int c = ls->current; next(ls); return c; } } } } }
int lex (char **input) { short comma; int state; int ret; comma = 0; state = STATE_START; ret = INVALIDSYM; if (!**input) return END; lp1 = *input; while (state != STATE_END && **input) { switch (state) { case STATE_START: if (**input == DEREF_SYM) { state = STATE_DEREF; *input += 1; } else if (**input == CHARC_OPEN_SYM) { state = trans(literal, input, CHARC_OPEN, STATE_END, &ret); if (!literal) literal = 1; } else if (**input == CHARC_CLOSE_SYM) { state = STATE_END; ret = CHARC_CLOSE; literal = 0; *input += 1; } else if (**input == REP_OPEN_SYM) { state = trans(literal, input, 0, STATE_REP, NULL); } else if (**input == REP_CLOSE_SYM) { state = trans(literal, input, RXVM_BADREP, STATE_END, &ret); } else if (**input == LPAREN_SYM) { state = trans(literal, input, LPAREN, STATE_END, &ret); } else if (**input == RPAREN_SYM) { state = trans(literal, input, RPAREN, STATE_END, &ret); } else if (**input == ONE_SYM) { state = trans(literal, input, ONE, STATE_END, &ret); } else if (**input == ONEZERO_SYM) { state = trans(literal, input, ONEZERO, STATE_END, &ret); } else if (**input == ZERO_SYM) { state = trans(literal, input, ZERO, STATE_END, &ret); } else if (**input == ANY_SYM) { state = trans(literal, input, ANY, STATE_END, &ret); } else if (**input == SOL_SYM) { state = trans(literal, input, SOL, STATE_END, &ret); } else if (**input == EOL_SYM) { state = trans(literal, input, EOL, STATE_END, &ret); } else if (**input == ALT_SYM) { state = trans(literal, input, ALT, STATE_END, &ret); } else if (isprintable(**input)) { state = STATE_LITERAL; } else { return RXVM_EINVAL; } break; case STATE_LITERAL: if (**input == RANGE_SEP_SYM && literal) { return RXVM_EINVAL; } else if (*(*input + 1) == RANGE_SEP_SYM && literal) { state = STATE_RANGE; *input += 2; } else { state = STATE_END; ret = LITERAL; *input += 1; } break; case STATE_DEREF: if (!**input) { return RXVM_EINVAL; } else { ret = LITERAL; lp1 += 1; } *input += 1; state = STATE_END; break; case STATE_RANGE: if (isprintable(**input) && !isreserved(**input)) { state = STATE_END; ret = CHAR_RANGE; *input += 1; } else { return RXVM_EINVAL; } break; case STATE_REP: if (**input == REP_CLOSE_SYM) { if (!isdigit(*(*input - 1)) && !isdigit(*(lp1 + 1))) { return RXVM_MREP; } state = STATE_END; ret = REP; } else if (**input == ',') { if (comma) return RXVM_EINVAL; comma = 1; } else if (!isdigit(**input)) { return RXVM_EINVAL; } *input += 1; break; } } if (state == STATE_DEREF) { ret = RXVM_ETRAIL; } else if (state == STATE_REP) { ret = RXVM_EREP; } lpn = *input; return ret; }
// sets token and token_type to reflect next occuring token int get_token() { char test[3]; register char *temp; int x; lasttype = token_type; token_type = 0; temp = token; for (x=0; x<TOKEN_LEN; x++) token[x] = '\0'; while(iswhite(*prog)) { tokenpos++; prog++; } if(*prog == '?') { prog++; tokenpos++; *temp++ = 'P'; *temp++ = 'R'; *temp++ = 'I'; *temp++ = 'N'; *temp++ = 'T'; return(token_type = TOK_COMMAND); } if((*prog == '\'') && (lasttype == 1)) { prog++; tokenpos++; *temp++ = 'E'; *temp++ = 'D'; *temp++ = 'I'; *temp++ = 'T'; return(token_type = TOK_COMMAND); } /* if((*prog == '.') && (lasttype == 1)) { prog++; tokenpos++; *temp++ = 'P'; *temp++ = 'B'; *temp++ = 'S'; *temp++ = 'T'; *temp++ = 'E'; *temp++ = 'P'; return(token_type = TOK_COMMAND); }*/ if(strchr("\n", *prog)) { *temp = 0; return(token_type = TOK_DONE); } if(*prog == '\'') { prog++; tokenpos++; while (*prog != '\'' && *prog != '\r') { if (strchr("\n", *prog)) return (token_type = TOK_ERROR); *temp++ = *prog++; } prog++; tokenpos++; *temp = 0; return (token_type = TOK_MNEMONIC); } if(*prog == '\"') { prog++; tokenpos++; while (*prog != '\"' && *prog != '\r') { if (strchr("\n", *prog)) return (token_type = TOK_ERROR); *temp++ = *prog++; } prog++; tokenpos++; *temp = 0; return(token_type = TOK_STRING); } if(isoper(*prog)) { if (*prog == ',') { prog++; *temp = 0; return(token_type = TOK_COMMA); } if (*prog == ':') { prog++; *temp = 0; return(token_type = TOK_COLON); } if (*prog == ';') { prog++; *temp = 0; return(token_type = TOK_SEMICOLON); } if (strchr("*<>=", *prog)) { *temp = *prog; prog++; temp++; if (strchr("*<>=", *prog)) { *temp = *prog; prog++; temp++; if (!strcmp(token, "<<") || !strcmp(token, ">>") || !strcmp(token, "*<") || !strcmp(token, "*>") || !strcmp(token, "*=") || !strcmp(token, "<*") || !strcmp(token, ">*") || !strcmp(token, "=*") || !strcmp(token, "==")) { return (token_type = TOK_ERROR); } } *temp = 0; } else { *temp = *prog; prog++; temp++; *temp = 0; } return(token_type = TOK_OPERATOR); } if(isdigit(*prog) || *prog == '.') { while(!isoper(*prog) && !iswhite(*prog)) *temp++ = *prog++; *temp = 0; return(token_type = TOK_NUMBER); } while(!isoper(*prog) && !iswhite(*prog) && *prog != '"' && *prog != '\'') *temp++ = *prog++; if (islongoper(token) && get_fnc(token)) { while (iswhite(*prog)) *temp++ = *prog++; if (*prog == '(') { while (iswhite(*prog)) *temp++ = *prog++; prog++; if (isdigit(*prog)) goto blah; else if ((*prog == '"') || (*prog == '$')) { for (x=strlen(token); x>=3; x--) prog--; for (x=3; x<strlen(token)+1; x++) token[x] = '\0'; return (token_type = TOK_FUNCTION); } else if (isalpha(*prog)) { while (isalpha(*prog)) *temp++ = *prog++; if (*prog == '$') { for (x=strlen(token); x>=3; x--) prog--; for (x=3; x<strlen(token)+1; x++) token[x] = '\0'; return (token_type = TOK_FUNCTION); } else if (*prog == '(') { // test if valid function or sysvar // if succeeded, test function/sysvar return type for strings test[0] = toupper(token[strlen(token)-3]); test[1] = toupper(token[strlen(token)-2]); test[2] = toupper(token[strlen(token)-1]); test[3] = '\0'; if (get_fnc(test) || get_sysvar(test)) { for (x=strlen(token); x>=3; x--) prog--; for (x=3; x<strlen(token)+1; x++) token[x] = '\0'; return (token_type = TOK_FUNCTION); } else goto blah; } else goto blah; } else goto blah; } else { blah: for (x=0; !iswhite(token[x]); x++) ; for (x=x; x<strlen(token)+1; x++) token[x] = '\0'; return (token_type = TOK_OPERATOR); } } else { // !!HACK!! // will decide if this is a sysvar or operator the next time // gettoken is run. if next token is '=' then it's an operator // if so it will change the metacode for this run. if (!strcmp(token, "ERR")) { checkerr = 2; return(token_type = TOK_VARIABLE); //SYSVAR); } if (islongoper(token)) return(token_type = TOK_OPERATOR); if (isreserved(token)) return(token_type = TOK_RESERVED); if (((token[0] == 'F') || (token[0] == 'f')) && ((token[1] == 'N') || (token[1] == 'n'))) return (token_type = TOK_USERFUNCTION); if (get_opcode(token)) return(token_type = TOK_COMMAND); if (get_fnc(token)) return (token_type = TOK_FUNCTION); else return(token_type = TOK_VARIABLE); } return 0; }
rebound lex()//词法分析 { int i,k=0; rebound re; re.Str[0]='\0'; while(c==' '||c=='\t') GETCHAR(); if(isalpha(c)||c =='_') { re.Str[k++]=c; GETCHAR(); while(isalpha(c)||isdigit(c)||c =='_') { if(k<VARLENGTH) re.Str[k++]=c; GETCHAR(); } if (k>VARLENGTH) // 判断标识符是否超过指定长度 { Error(lineNo,"Identifer is too long!"); re.Str[VARLENGTH]='\0'; } else re.Str[k]='\0'; if((i=isreserved(re.Str))!=-1) re.type=i; else re.type=IDENTF; } else if(isdigit(c)) { if(c!='0') { while (isdigit(c)) { if (k<STRINGLENGTH) re.Str[k++]=c; else k++; GETCHAR(); } } else { GETCHAR(); if(isdigit(c)||isalpha(c)) { Error(lineNo,"Illegal number in this line!"); while(isdigit(c)||isalpha(c)) GETCHAR(); } re.Str[k++]='0'; } re.type=INTEGER; if (c=='.') { // 判断是浮点数还是整数 re.Str[k++]=c; GETCHAR(); if(!isdigit(c)) re.Str[k]='\0'; while (isdigit(c)) { if (k<STRINGLENGTH) re.Str[k++]=c; else k++; GETCHAR(); } re.type=FLOAT; } if (k<STRINGLENGTH) { if(re.Str[k-1]=='.')//处理 1.空 这种数字 re.Str[k-1]='\0'; else re.Str[k]='\0'; } else re.Str[STRINGLENGTH]='\0'; if ((re.intval=atoi(re.Str))>INTLENGTH) // 判断整数是否超出范围 Error(lineNo,"The integer in your program is out of range!"); if ((re.realval=(float)atof(re.Str))>FLOATLENGTH) // 判断浮点数是否超出范围 Error(lineNo,"The realnum in your program is out of range!"); } else switch (c) { case '\'': // 处理字符常量 GETCHAR(); re.ch=c; re.type=CHAR; GETCHAR(); if (c!='\'') Error(lineNo," The char is lack of \' "); else GETCHAR(); break; case '"': GETCHAR(); while (c!='"') { if (k<STRINGLENGTH) re.Str[k++]=c; else k++; GETCHAR(); } if (c=='"') GETCHAR(); if (k<=STRINGLENGTH) // 判断字符串是否过长 re.Str[k]='\0'; else { Error(lineNo,"The string is too long!"); re.Str[STRINGLENGTH-1]='\0'; } re.type=STRING; break; case '=': GETCHAR(); if (c=='=') { re.type=EQUAL; GETCHAR(); } else re.type=EVALUE; break; case '<': // 处理 <= 和 < GETCHAR(); if (c=='=') { re.type=NLESSTHAN; GETCHAR(); } else re.type=LESSTHAN; break; case '>': // 处理 >= 和 > GETCHAR(); if (c=='=') { re.type=NMORETHAN; GETCHAR(); } else re.type=MORETHAN; break; case '!': // 处理 != GETCHAR(); if (c=='=') { re.type=UNEQL; GETCHAR(); } else { Error(lineNo,"Illegal words in this line"); GETCHAR(); } break; case '+': re.type=PLUS; GETCHAR(); break; case '-': re.type=MINUS; GETCHAR(); break; case '*': re.type=MUL; GETCHAR(); break; case '/': re.type=DIV; GETCHAR(); break; case '(': re.type=LEFTP; GETCHAR(); break; case ')': re.type=RIGHTP; GETCHAR(); break; case '{': re.type=LBRACKET; GETCHAR(); break; case '}': re.type=RBRACKET; GETCHAR(); break; case ',': re.type=COMMA; GETCHAR(); break; case ';': re.type=SEMICOLON; GETCHAR(); break; case ':': re.type=COLON; GETCHAR(); break; case -1: re.type=-1; GETCHAR(); break; default: Error(lineNo,"Your character is beyond the grammar"); GETCHAR(); } if(re.type == IDENTF)//标识符不区分大小写 { for(int x=0;re.Str[x]!='\0';x++) { if(re.Str[x]>='a'&&re.Str[x]<='z') re.Str[x]-='a'-'A'; } } return re; }