void startLex() { char *result; char red; char *number; int iString; while((red = fgetc(myFile)) != EOF) { if(red == ' ' || red == '\n' || red == '\t') continue; if(isComment(red) == 1) { remove(); continue; } else returnItAll(); iString = isString(red); if(iString) { if(iString == 2) { iUnrec++; printf("\nUnrecognized lexeme: %s",word); recognized(word,"UNIDENTIFIED"); continue; } iRec++; recognized(word, "STRING_LITERAL"); continue; } else returnItAll(); if(red == '\''); else if(isTerminated(red) == 1) { iRec++; red = fgetc(myFile); addletter(red); char sec = fgetc(myFile); if(sec == '=') { addletter(sec); if(red == '<') recognized(word,"LESSEQL"); if(red == '>') recognized(word,"GRTREQL"); if(red == '!') recognized(word,"NOTEQL"); continue; } else if(red == '<') recognized(word,"GRTRTH"); else if(red == '>') recognized(word,"LESSTH"); else if(red == '!') recognized(word,"NOT"); else if(red == '+') recognized(word,"ADD"); else if(red == '-') recognized(word,"SUB"); else if(red == '*') recognized(word,"MUL"); else if(red == '^') recognized(word,"RAISE"); else if(red == '/') recognized(word,"DIV"); else if(red == '%') recognized(word,"MOD"); else if(red == '~') recognized(word,"TRUNC_DIV"); else if(red == '|') recognized(word,"OR"); else if(red == '&') recognized(word,"AND"); else if(red == '=') recognized(word,"EQLTO"); else if(red == '?') recognized(word,"COND_END"); else if(red == ':') recognized(word,"ASSIGN"); else if(red == '{') recognized(word,"B_START"); else if(red == '}') recognized(word,"B_END"); else if(red == ';') recognized(word,"STMNT_END"); else if(red == '(') recognized(word,"P_START"); else if(red == ')') recognized(word,"P_END"); else if(red == ',') recognized(word,"COM"); ungetc(sec,myFile); continue; } if(!(strcmp((number = isNumber(red)),"ERR") == 0)) { iRec++; recognized(word, number); continue; } else returnItAll(); if(!(strcmp((result = isKeyword(red)),"ERR") == 0)) { iRec++; recognized(word, result); continue; } else returnItAll(); if(isIdentifier(red)) { iRec++; recognized(word,"ID"); continue; } else returnItAll(); { iUnrec++; while((red = fgetc(myFile)) != EOF) { if(red == EOF) break; if(isTerminated(red)) { if(red == '\''); else { ungetc(red,myFile); break; } } addletter(red); } printf("\nUnrecognized lexeme: %s",word); recognized(word, "UNIDENTIFIED"); } }//while((red = fgetc(myFile)) != EOF); }
static boolean findKeyword (tokenInfo *const token, const keywordId keyword) { while (! isKeyword (token, keyword) && ! isType (token, TOKEN_EOF)) readToken (token); return isKeyword (token, keyword); }
static boolean skipType (tokenInfo *const token) { // Type = TypeName | TypeLit | "(" Type ")" . // Skips also function multiple return values "(" Type {"," Type} ")" if (isType (token, TOKEN_OPEN_PAREN)) { skipToMatched (token); return TRUE; } // TypeName = QualifiedIdent. // QualifiedIdent = [ PackageName "." ] identifier . // PackageName = identifier . if (isType (token, TOKEN_IDENTIFIER)) { readToken (token); if (isType (token, TOKEN_DOT)) { readToken (token); if (isType (token, TOKEN_IDENTIFIER)) readToken (token); } return TRUE; } // StructType = "struct" "{" { FieldDecl ";" } "}" // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface)) { readToken (token); // skip over "{}" skipToMatched (token); return TRUE; } // ArrayType = "[" ArrayLength "]" ElementType . // SliceType = "[" "]" ElementType . // ElementType = Type . if (isType (token, TOKEN_OPEN_SQUARE)) { skipToMatched (token); return skipType (token); } // PointerType = "*" BaseType . // BaseType = Type . // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW)) { readToken (token); return skipType (token); } // MapType = "map" "[" KeyType "]" ElementType . // KeyType = Type . if (isKeyword (token, KEYWORD_map)) { readToken (token); // skip over "[]" skipToMatched (token); return skipType (token); } // FunctionType = "func" Signature . // Signature = Parameters [ Result ] . // Result = Parameters | Type . // Parameters = "(" [ ParameterList [ "," ] ] ")" . if (isKeyword (token, KEYWORD_func)) { readToken (token); // Parameters, skip over "()" skipToMatched (token); // Result is parameters or type or nothing. skipType treats anything // surrounded by parentheses as a type, and does nothing if what // follows is not a type. return skipType (token); } return FALSE; }
void MacroParenthesesPPCallbacks::argument(const Token &MacroNameTok, const MacroInfo *MI) { for (auto TI = MI->tokens_begin(), TE = MI->tokens_end(); TI != TE; ++TI) { // First token. if (TI == MI->tokens_begin()) continue; // Last token. if ((TI + 1) == MI->tokens_end()) continue; const Token &Prev = *(TI - 1); const Token &Next = *(TI + 1); const Token &Tok = *TI; // Only interested in identifiers. if (!Tok.isOneOf(tok::identifier, tok::raw_identifier)) continue; // Only interested in macro arguments. if (MI->getArgumentNum(Tok.getIdentifierInfo()) < 0) continue; // Argument is surrounded with parentheses/squares/braces/commas. if (isSurroundedLeft(Prev) && isSurroundedRight(Next)) continue; // Don't warn after hash/hashhash or before hashhash. if (Prev.isOneOf(tok::hash, tok::hashhash) || Next.is(tok::hashhash)) continue; // Argument is a struct member. if (Prev.isOneOf(tok::period, tok::arrow, tok::coloncolon)) continue; // Argument is a namespace or class. if (Next.is(tok::coloncolon)) continue; // String concatenation. if (isStringLiteral(Prev.getKind()) || isStringLiteral(Next.getKind())) continue; // Type/Var. if (isAnyIdentifier(Prev.getKind()) || isKeyword(Prev) || isAnyIdentifier(Next.getKind()) || isKeyword(Next)) continue; // Initialization. if (Next.is(tok::l_paren)) continue; // Cast. if (Prev.is(tok::l_paren) && Next.is(tok::star) && TI + 2 != MI->tokens_end() && (TI + 2)->is(tok::r_paren)) continue; // Assignment/return, i.e. '=x;' or 'return x;'. if (Prev.isOneOf(tok::equal, tok::kw_return) && Next.is(tok::semi)) continue; // C++ template parameters. if (PP->getLangOpts().CPlusPlus && Prev.isOneOf(tok::comma, tok::less) && Next.isOneOf(tok::comma, tok::greater)) continue; Check->diag(Tok.getLocation(), "macro argument should be enclosed in " "parentheses") << FixItHint::CreateInsertion(Tok.getLocation(), "(") << FixItHint::CreateInsertion(Tok.getLocation().getLocWithOffset( PP->getSpelling(Tok).length()), ")"); } }
static void findKeyword (tokenInfo *const token, const keywordId keyword) { while (! isKeyword (token, keyword)) readToken (token); }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); } while (c == '\t' || c == ' ' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_KEYWORD: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); goto getNextChar; } } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } LastTokenType = token->type; }
static boolean parseIf (tokenInfo *const token) { boolean read_next_token = TRUE; /* * If statements have two forms * if ( ... ) * one line; * * if ( ... ) * statement; * else * statement * * if ( ... ) { * multiple; * statements; * } * * * if ( ... ) { * return elem * } * * This example if correctly written, but the * else contains only 1 statement without a terminator * since the function finishes with the closing brace. * * function a(flag){ * if(flag) * test(1); * else * test(2) * } * * TODO: Deal with statements that can optional end * without a semi-colon. Currently this messes up * the parsing of blocks. * Need to somehow detect this has happened, and either * backup a token, or skip reading the next token if * that is possible from all code locations. * */ readToken (token); if (isKeyword (token, KEYWORD_if)) { /* * Check for an "else if" and consume the "if" */ readToken (token); } if (isType (token, TOKEN_OPEN_PAREN)) { /* * Handle nameless functions, these will only * be considered methods. */ skipArgumentList(token); } if (isType (token, TOKEN_OPEN_CURLY)) { /* * This will be either a function or a class. * We can only determine this by checking the body * of the function. If we find a "this." we know * it is a class, otherwise it is a function. */ parseBlock (token, token); } else { findCmdTerm (token); /* The next token should only be read if this statement had its own * terminator */ read_next_token = isType (token, TOKEN_SEMICOLON); } return read_next_token; }
static void findFeatureEnd (tokenInfo *const token) { boolean isFound = isKeyword (token, KEYWORD_is); if (isFound) readToken (token); switch (token->keyword) { case KEYWORD_deferred: case KEYWORD_do: case KEYWORD_external: case KEYWORD_local: case KEYWORD_obsolete: case KEYWORD_once: case KEYWORD_require: { int depth = 1; while (depth > 0 && ! isType (token, TOKEN_EOF)) { #ifdef TYPE_REFERENCE_TOOL if (isType (token, TOKEN_OPEN_BRACE)) { readToken (token); if (isType (token, TOKEN_IDENTIFIER)) parseType (token); } else if (isType (token, TOKEN_BANG)) { readToken (token); if (isType (token, TOKEN_IDENTIFIER)) parseType (token); if (isType (token, TOKEN_BANG)) readToken (token); } else #endif switch (token->keyword) { case KEYWORD_check: case KEYWORD_debug: case KEYWORD_from: case KEYWORD_if: case KEYWORD_inspect: ++depth; break; case KEYWORD_local: parseLocal (token); break; case KEYWORD_end: --depth; break; default: break; } readToken (token); } break; } default: /* is this a manifest constant? */ if (isFound || isType (token, TOKEN_OPERATOR)) { if (isType (token, TOKEN_OPERATOR)) readToken (token); readToken (token); } break; } }
string colourHTMLCode(MavenCompiler* c, string input) { string r = "<pre style=\"border: dashed 2px #CCCCCC; padding: 3px; background-color: #EEEEEE\">"; char ch; int tabSize = 4; // the number of spaces in a tab string keyword = ""; for(int i = 0; i < input.length(); ++i) { ch = input[i]; // comments if(ch == '/') { // single line if(input[i + 1] == '/') { r += "<font color=\"#009900\">"; for(; i < input.length(); ++i) { if(input[i] == '\n') break; r += input[i]; } r += "</font>\n"; continue; } // multi line if(input[i + 1] == '*') { r += "<font color=\"#009900\">"; for(; i < input.length(); ++i) { if(input[i] == '*' && input[i + 1] == '/') break; r += input[i]; } ++i; r += "*/</font>"; continue; } } // strings if(ch == '"') { r += "<font color=\"#FF0000\">\""; ++i; for(; i < input.length(); ++i) { ch = input[i]; if(ch == '"') break; r += ch; } r += "\"</font>"; continue; } if(isspace(ch)) { if(isKeyword(c, keyword)) { r += "<font color=\"#0000CC\">" + keyword + "</font>"; } else r += keyword; keyword = ""; r += ch; } else keyword += ch; } return r + "</pre>"; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); i++; } while (c == '\t' || c == ' ' || ((c == '\r' || c == '\n') && ! include_newlines)); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\r': case '\n': /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left invalid. Hopefully this should not have false negatives * (e.g. should not miss insertion of a semicolon) but might have * false positives (e.g. it will wrongfully emit a semicolon for the * newline in "foo\n+bar"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ switch (LastTokenType) { /* these cannot be the end of a statement, so hold the newline */ case TOKEN_EQUAL_SIGN: case TOKEN_COLON: case TOKEN_PERIOD: case TOKEN_FORWARD_SLASH: case TOKEN_BINARY_OPERATOR: /* and these already end one, no need to duplicate it */ case TOKEN_SEMICOLON: case TOKEN_COMMA: case TOKEN_CLOSE_CURLY: case TOKEN_OPEN_CURLY: include_newlines = FALSE; /* no need to recheck */ goto getNextChar; break; default: token->type = TOKEN_SEMICOLON; } break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { skipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } LastTokenType = token->type; }
// just for hex num, char, char * style string, float or double number bool isLiteral(const char *str) { size_t len; int i; if(isKeyword(str)) return false; len = strlen(str); if(len == 3 && str[0] == '\'' && str[2] == '\'') return true; if(len >= 3 && str[0] == '\"' && str[len - 1] == '\"') return true; for(i = 0; i < len; ++i) { if(isnumber(str[i])) continue; if(i == len - 1 && (str[i] == 'u' || str[i] == 'U' || str[i] == 'l' || str[i] == 'L')) return true; if(i == len - 2) { if((str[i] == 'u' || str[i] == 'U') && (str[i + 1] == 'l' || str[i + 1] == 'L')) return true; if((str[i + 1] == 'u' || str[i + 1] == 'U') && (str[i] == 'l' || str[i] == 'L')) return true; } if(str[i] == '.') { int j = i + 1; for(; j < len; ++j) { if(isnumber(str[j])) continue; break; } if(j == len - 1) { if(str[j] == 'f' || str[j] == 'F') return true; } if(j == len) return true; return false; } if(str[i] == 'e' || str[i] == 'E') { int j = i + 1; for(; j < len; ++j) { if(isnumber(str[j])) continue; return false; } return true; } return false; } return true; }
inline bool isKwordOrPunc( const String& next ) { return isKeyword( next ) || isPuntaction( next ); }
std::string CppKeyword::escapeKeyword(const std::string& word) { if (isKeyword(word)) return word + "_"; return word; }
int LexicalAnalyzer::lex() { lexenum = ""; while (charClass == SPACE) getChar(); if (charClass == ERROR) { addChar(); getChar(); return ERROR; } if (charClass == STOP) { return STOP; } switch (charClass) { case LETTER: addChar(); getChar(); while (charClass == LETTER || charClass == DIGIT) { addChar(); getChar(); } return isKeyword(lexenum) ? KEYWORD : ID; break; case DIGIT: addChar(); getChar(); isFloat = false; if(currentChar == '.') { addChar(); isFloat = true; getChar(); if(currentChar == 'E') { addChar(); isFloat = true; getChar(); if(currentChar == '+' || currentChar == '-') { addChar(); getChar(); } } } if(currentChar == 'E') { addChar(); isFloat = true; getChar(); if(currentChar == '+' || currentChar == '-') { addChar(); getChar(); } } while (charClass == DIGIT) { addChar(); getChar(); if(currentChar == '.') { addChar(); isFloat = true; getChar(); } if(currentChar == 'E') { addChar(); isFloat = true; getChar(); if(currentChar == '+' || currentChar == '-') { addChar(); getChar(); } } } if(isFloat) { isFloat = false; return FLOAT; } else { return INT; } break; case SYMBOL: if(currentChar == '<') { addChar(); getChar(); if(currentChar == '=') { addChar(); getChar(); } } else { if(currentChar == '>') { addChar(); getChar(); if(currentChar == '=') { addChar(); getChar(); } } else { if (currentChar == '=') { addChar(); getChar(); if(currentChar == '=') { addChar(); getChar(); } } else { if(currentChar == '!') { addChar(); getChar(); if(currentChar == '=') { addChar(); getChar(); } } else { addChar(); getChar(); } } } } return SYMBOL; break; default: break; } return 0; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; boolean newline_encountered = FALSE; /* if we've got a token held back, emit it */ if (NextToken) { copyToken (token, NextToken, FALSE); deleteToken (NextToken); NextToken = NULL; return; } token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); if (include_newlines && (c == '\r' || c == '\n')) newline_encountered = TRUE; i++; } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '`': token->type = TOKEN_TEMPLATE_STRING; parseTemplateString (token->string); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_TEMPLATE_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_BINARY_OPERATOR; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { fileSkipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } if (include_newlines && newline_encountered) { /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left or right obviously invalid. Hopefully this should not * have false negatives (e.g. should not miss insertion of a semicolon) * but might have false positives (e.g. it will wrongfully emit a * semicolon sometimes, i.e. for the newline in "foo\n(bar)"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ /* these already end a statement, so no need to duplicate it */ #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \ (t) == TOKEN_EOF || \ (t) == TOKEN_COMMA || \ (t) == TOKEN_CLOSE_CURLY || \ (t) == TOKEN_OPEN_CURLY) /* these cannot be the start or end of a statement */ #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \ (t) == TOKEN_COLON || \ (t) == TOKEN_PERIOD || \ (t) == TOKEN_BINARY_OPERATOR) if (! IS_STMT_SEPARATOR(LastTokenType) && ! IS_STMT_SEPARATOR(token->type) && ! IS_BINARY_OPERATOR(LastTokenType) && ! IS_BINARY_OPERATOR(token->type) && /* these cannot be followed by a semicolon */ ! (LastTokenType == TOKEN_OPEN_PAREN || LastTokenType == TOKEN_OPEN_SQUARE)) { /* hold the token... */ Assert (NextToken == NULL); NextToken = newToken (); copyToken (NextToken, token, FALSE); /* ...and emit a semicolon instead */ token->type = TOKEN_SEMICOLON; token->keyword = KEYWORD_NONE; vStringClear (token->string); if (repr) vStringPut (token->string, '\n'); } #undef IS_STMT_SEPARATOR #undef IS_BINARY_OPERATOR } LastTokenType = token->type; }
// concept/role constructors; return BAD_LEX in case of error LispToken TsScanner :: getExpressionKeyword ( void ) const { if ( isKeyword ("and") ) return L_AND; if ( isKeyword ("or") ) return L_OR; if ( isKeyword ("not") ) return L_NOT; if ( isKeyword ("inv") || isKeyword ("inverse") ) return L_INV; if ( isKeyword ("compose") ) return L_RCOMPOSITION; if ( isKeyword ("project_into") ) return L_PROJINTO; if ( isKeyword ("project_from") ) return L_PROJFROM; if ( isKeyword ("some") ) return L_EXISTS; if ( isKeyword ("all") ) return L_FORALL; if ( isKeyword("min") || isKeyword("at-least") || isKeyword("atleast") ) return L_GE; if ( isKeyword("max") || isKeyword("at-most") || isKeyword("atmost") ) return L_LE; if ( isKeyword ("one-of") ) return ONEOF; if ( isKeyword ("self-ref") ) return REFLEXIVE; if ( isKeyword ("string") ) return STRING; if ( isKeyword ("number") ) return NUMBER; if ( isKeyword ("real") ) return REAL; if ( isKeyword ("bool") ) return BOOL; if ( isKeyword("gt") ) return DTGT; if ( isKeyword("lt") ) return DTLT; if ( isKeyword("ge") ) return DTGE; if ( isKeyword("le") ) return DTLE; if ( isKeyword("d-one-of") ) return DONEOF; // not a keyword -- error return BAD_LEX; }
void Parser::checkRedundantToken() { if (!isKeyword("")) { throw ParseException(_stmtNum, _token, "Unnecessary token"); } }
// recognize FaCT++ keywords; return BAD_LEX if not found LispToken TsScanner :: getCommandKeyword ( void ) const { // definitions if ( isKeyword ("defprimconcept") ) return PCONCEPT; if ( isKeyword ("defconcept") ) return CONCEPT; if ( isKeyword ("defprimrole") ) return PROLE; if ( isKeyword ("defdatarole") ) return DATAROLE; if ( isKeyword ("defprimattribute") ) return PATTR; if ( isKeyword ("defindividual") ) return DEFINDIVIDUAL; // general relations if ( isKeyword ("implies") || isKeyword ("implies_c") ) return SUBSUMES; if ( isKeyword ("equal_c") ) return EQUAL_C; if ( isKeyword ("disjoint") || isKeyword ("disjoint_c") ) return DISJOINT; if ( isKeyword ("implies_r") ) return IMPLIES_R; if ( isKeyword ("equal_r") ) return EQUAL_R; if ( isKeyword ("disjoint_r") ) return DISJOINT_R; if ( isKeyword ("inverse") ) return INVERSE; // role stuff if ( isKeyword ("functional") ) return FUNCTIONAL; if ( isKeyword ("transitive") ) return TRANSITIVE; if ( isKeyword ("reflexive") ) return REFLEXIVE; if ( isKeyword ("irreflexive") ) return IRREFLEXIVE; if ( isKeyword ("symmetric") ) return SYMMETRIC; if ( isKeyword ("asymmetric") ) return ASYMMETRIC; if ( isKeyword ("range") ) return ROLERANGE; if ( isKeyword ("domain") ) return ROLEDOMAIN; // individual stuff if ( isKeyword ("instance") ) return INSTANCE; if ( isKeyword ("related") ) return RELATED; if ( isKeyword ("same") ) return SAME; if ( isKeyword ("different") ) return DIFFERENT; if ( isKeyword ("fairness") ) return FAIRNESS; // not a keyword -- error return BAD_LEX; }
static void parseLoop (tokenInfo *const token) { /* * Handles these statements * for (x=0; x<3; x++) * document.write("This text is repeated three times<br>"); * * for (x=0; x<3; x++) * { * document.write("This text is repeated three times<br>"); * } * * while (number<5){ * document.write(number+"<br>"); * number++; * } * * do{ * document.write(number+"<br>"); * number++; * } * while (number<5); */ if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while)) { readToken(token); if (isType (token, TOKEN_OPEN_PAREN)) { /* * Handle nameless functions, these will only * be considered methods. */ skipArgumentList(token); } if (isType (token, TOKEN_OPEN_CURLY)) { /* * This will be either a function or a class. * We can only determine this by checking the body * of the function. If we find a "this." we know * it is a class, otherwise it is a function. */ parseBlock (token, token); } else { parseLine(token, FALSE); } } else if (isKeyword (token, KEYWORD_do)) { readToken(token); if (isType (token, TOKEN_OPEN_CURLY)) { /* * This will be either a function or a class. * We can only determine this by checking the body * of the function. If we find a "this." we know * it is a class, otherwise it is a function. */ parseBlock (token, token); } else { parseLine(token, FALSE); } readToken(token); if (isKeyword (token, KEYWORD_while)) { readToken(token); if (isType (token, TOKEN_OPEN_PAREN)) { /* * Handle nameless functions, these will only * be considered methods. */ skipArgumentList(token); } } } }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); break; case '-': c = fileGetc (); if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; case '/': { int d = fileGetc (); if (d != '*') /* is this the start of a comment? */ fileUngetc (d); else { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != '\0'); goto getNextChar; } break; } default: if (! isIdentChar1 (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_rem)) { vStringClear (token->string); skipToCharacter ('\n'); goto getNextChar; } else if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }
static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent) { boolean is_class = FALSE; boolean read_next_token = TRUE; vString * saveScope = vStringNew (); token->nestLevel++; /* * Make this routine a bit more forgiving. * If called on an open_curly advance it */ if ( isType (token, TOKEN_OPEN_CURLY) && isKeyword(token, KEYWORD_NONE) ) readToken(token); if (! isType (token, TOKEN_CLOSE_CURLY)) { /* * Read until we find the closing brace, * any nested braces will be handled within */ do { read_next_token = TRUE; if (isKeyword (token, KEYWORD_this)) { /* * Means we are inside a class and have found * a class, not a function */ is_class = TRUE; vStringCopy(saveScope, token->scope); addToScope (token, parent->string); /* * Ignore the remainder of the line * findCmdTerm(token); */ parseLine (token, is_class); vStringCopy(token->scope, saveScope); } else if (isKeyword (token, KEYWORD_var)) { /* * Potentially we have found an inner function. * Set something to indicate the scope */ vStringCopy(saveScope, token->scope); addToScope (token, parent->string); parseLine (token, is_class); vStringCopy(token->scope, saveScope); } else if (isKeyword (token, KEYWORD_function)) { vStringCopy(saveScope, token->scope); addToScope (token, parent->string); parseFunction (token); vStringCopy(token->scope, saveScope); } else if (isType (token, TOKEN_OPEN_CURLY)) { /* Handle nested blocks */ parseBlock (token, parent); } else { /* * It is possible for a line to have no terminator * if the following line is a closing brace. * parseLine will detect this case and indicate * whether we should read an additional token. */ read_next_token = parseLine (token, is_class); } /* * Always read a new token unless we find a statement without * a ending terminator */ if( read_next_token ) readToken(token); /* * If we find a statement without a terminator consider the * block finished, otherwise the stack will be off by one. */ } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token ); } vStringDelete(saveScope); token->nestLevel--; return is_class; }
Symbolhandle setseed(Symbolhandle list) { Symbolhandle symhf, symhl, symhKey; double first, last; long nargs = NARGS(list); long margs = nargs; long verbose = 1; char *keyword; OUTSTR[0] = '\0'; if (nargs > 3) { badNargs(FUNCNAME, -3); goto errorExit; } if (nargs > 1) { /* check for quiet:T or simply logical scalar 3rd argument */ symhKey = COMPVALUE(list,nargs-1); if (!argOK(symhKey, 0, nargs)) { goto errorExit; } if ((keyword = isKeyword(symhKey)) && strcmp(keyword, "quiet") != 0) { badKeyword(FUNCNAME, keyword); goto errorExit; } if (isTorF(symhKey)) { verbose = (DATAVALUE(symhKey,0) == 0); nargs--; } else if(keyword || nargs == 3) { notTorF((keyword) ? keyword : "argument 3"); goto errorExit; } } /*if (nargs > 1)*/ symhf = COMPVALUE(list,0); if(!argOK(symhf, REAL, (margs > 1) ? 1 : 0)) { goto errorExit; } if(nargs == 2) { if(!argOK(symhl = COMPVALUE(list,1), REAL,2)) { goto errorExit; } if (!isInteger(symhf, NONNEGATIVEVALUE) || !isInteger(symhl, NONNEGATIVEVALUE)) { sprintf(OUTSTR, "ERROR: when seeds are separate arguments, both must be integers >= 0"); } else { first = DATAVALUE(symhf,0); last = DATAVALUE(symhl,0); } } /*if(nargs == 2)*/ else { if(!isVector(symhf) || symbolSize(symhf) != 2) { sprintf(OUTSTR, "ERROR: a single seed argument to %s must be a vector of length 2", FUNCNAME); } else { first = DATAVALUE(symhf,0); last = DATAVALUE(symhf,1); } } /*if(nargs == 2){}else{}*/ if(*OUTSTR) { goto errorExit; } if(isMissing(first) || isMissing(last)) { sprintf(OUTSTR,"ERROR: missing values not allowed as seeds"); } else if(first != floor(first) || first < 0 || last != floor(last) || last < 0 || first > RANDM1 || last > RANDM2) { sprintf(OUTSTR, "ERROR: seeds must non-negative integers, seed1 <= %ld and seed2 <= %ld", RANDM1, RANDM2); } if(*OUTSTR) { goto errorExit; } if(first == 0 || last == 0) { randomSeed(verbose); } else { Rands1 = first; Rands2 = last; } return (NULLSYMBOL); errorExit: putErrorOUTSTR(); return (0); } /*setseed()*/
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '!': token->type = TOKEN_BANG; break; case '$': token->type = TOKEN_DOLLAR; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_DOT; break; case ';': goto getNextChar; case '[': token->type = TOKEN_OPEN_BRACKET; break; case ']': token->type = TOKEN_CLOSE_BRACKET; break; case '{': token->type = TOKEN_OPEN_BRACE; break; case '}': token->type = TOKEN_CLOSE_BRACE; break; case '~': token->type = TOKEN_TILDE; break; case '+': case '*': case '^': case '=': token->type = TOKEN_OPERATOR; break; case '-': c = fileGetc (); if (c == '>') token->type = TOKEN_CONSTRAINT; else if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; case '?': case ':': c = fileGetc (); if (c == '=') token->type = TOKEN_OPERATOR; else { token->type = TOKEN_COLON; if (!isspace (c)) fileUngetc (c); } break; case '<': c = fileGetc (); if (c != '=' && c != '>' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '>': c = fileGetc (); if (c != '=' && c != '>' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '/': c = fileGetc (); if (c != '/' && c != '=' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '\\': c = fileGetc (); if (c != '\\' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '"': token->type = TOKEN_STRING; parseString (token->string); break; case '\'': token->type = TOKEN_CHARACTER; parseCharacter (); break; default: if (isalpha (c)) { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else if (isdigit (c)) { vStringCat (token->string, parseNumeric (c)); token->type = TOKEN_NUMERIC; } else if (isFreeOperatorChar (c)) { parseFreeOperator (token->string, c); token->type = TOKEN_OPERATOR; } else { token->type = TOKEN_UNDEFINED; Assert (! isType (token, TOKEN_UNDEFINED)); } break; } }
Symbolhandle getseed(Symbolhandle list) { Symbolhandle result = (Symbolhandle) 0; Symbolhandle symhKey; char *keyword; long verbose = 1; long nargs = NARGS(list); if(nargs > 1) { badNargs(FUNCNAME, -1); goto errorExit; } symhKey = COMPVALUE(list,0); if (symhKey != (Symbolhandle) 0) { if (!argOK(symhKey, 0, 0)) { goto errorExit; } if ((keyword = isKeyword(symhKey)) && strcmp(keyword, "quiet") != 0) { badKeyword(FUNCNAME, keyword); goto errorExit; } if (!isTorF(symhKey)) { notTorF("argument"); goto errorExit; } verbose = (DATAVALUE(symhKey,0) == 0); } if (verbose) { sprintf(OUTSTR, "Seeds are %ld and %ld", Rands1, Rands2); putOUTSTR(); } result = RInstall(SCRATCH, 2); if (result == (Symbolhandle) 0) { goto errorExit; } #ifdef INVISIBLESYMBOLS if (verbose) { setNAME(result, INVISSCRATCH); } #endif /*INVISIBLESYMBOLS*/ DATAVALUE(result,0) = (double) Rands1; DATAVALUE(result,1) = (double) Rands2; return (result); errorExit: return ((Symbolhandle) 0); } /*getseed()*/
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = getcFromInputFile (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '!': token->type = TOKEN_BANG; break; case '}': token->type = TOKEN_CLOSE_BRACE; break; case ']': token->type = TOKEN_CLOSE_BRACKET; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ',': token->type = TOKEN_COMMA; break; case '$': token->type = TOKEN_DOLLAR; break; case '.': token->type = TOKEN_DOT; break; case '{': token->type = TOKEN_OPEN_BRACE; break; case '[': token->type = TOKEN_OPEN_BRACKET; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case '~': token->type = TOKEN_TILDE; break; case '+': case '*': case '^': case '=': token->type = TOKEN_OPERATOR; break; case '-': c = getcFromInputFile (); if (c == '>') token->type = TOKEN_CONSTRAINT; else if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; } break; case '?': case ':': { int c2 = getcFromInputFile (); if (c2 == '=') token->type = TOKEN_OPERATOR; else { if (!isspace (c2)) ungetcToInputFile (c2); if (c == ':') token->type = TOKEN_COLON; else token->type = TOKEN_QUESTION; } break; } case '<': c = getcFromInputFile (); if (c != '=' && c != '>' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '>': c = getcFromInputFile (); if (c != '=' && c != '>' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '/': c = getcFromInputFile (); if (c != '/' && c != '=' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '\\': c = getcFromInputFile (); if (c != '\\' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '"': token->type = TOKEN_STRING; parseString (token->string); break; case '\'': token->type = TOKEN_CHARACTER; parseCharacter (); break; default: if (isalpha (c)) { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string, Lang_eiffel); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else if (isdigit (c)) { vString* numeric = parseNumeric (c); vStringCat (token->string, numeric); vStringDelete (numeric); token->type = TOKEN_NUMERIC; } else if (isFreeOperatorChar (c)) { parseFreeOperator (token->string, c); token->type = TOKEN_OPERATOR; } else token->type = TOKEN_UNDEFINED; break; } }
bool SymbolTable::isKeyword(const Value &name) { if (name.kind == Value::t_symbol || name.kind == Value::t_string) return isKeyword(name.token_id); return false; }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; boolean firstWhitespace = TRUE; boolean whitespace; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { c = ';'; // semicolon injection } whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { firstWhitespace = FALSE; vStringPut(signature, ' '); } } while (whitespace); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { if (token->type == TOKEN_LEFT_ARROW) vStringCatS(signature, "<-"); else if (token->type == TOKEN_STRING) { // only struct member annotations can appear in function prototypes // so only `` type strings are possible vStringPut(signature, '`'); vStringCat(signature, token->string); vStringPut(signature, '`'); } else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) vStringCat(signature, token->string); else if (c != EOF) vStringPut(signature, c); } lastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { token->type = TOKEN_SEMICOLON; goto done; } } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } done: lastTokenType = token->type; }
static void parseConstTypeVar (tokenInfo *const token, goKind kind) { // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . // IdentifierList = identifier { "," identifier } . // ExpressionList = Expression { "," Expression } . // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . // TypeSpec = identifier Type . // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . boolean usesParens = FALSE; readToken (token); if (isType (token, TOKEN_OPEN_PAREN)) { usesParens = TRUE; readToken (token); } do { tokenInfo *typeToken = NULL; while (!isType (token, TOKEN_EOF)) { if (isType (token, TOKEN_IDENTIFIER)) { if (kind == GOTAG_TYPE) { typeToken = copyToken (token); readToken (token); if (isKeyword (token, KEYWORD_struct)) makeTag (typeToken, GOTAG_STRUCT, NULL, GOTAG_UNDEFINED, NULL, NULL); else if (isKeyword (token, KEYWORD_interface)) makeTag (typeToken, GOTAG_INTERFACE, NULL, GOTAG_UNDEFINED, NULL, NULL); else makeTag (typeToken, kind, NULL, GOTAG_UNDEFINED, NULL, NULL); break; } else makeTag (token, kind, NULL, GOTAG_UNDEFINED, NULL, NULL); readToken (token); } if (!isType (token, TOKEN_COMMA)) break; readToken (token); } if (typeToken) { if (isKeyword (token, KEYWORD_struct)) parseStructMembers (token, typeToken); else skipType (token); deleteToken (typeToken); } else skipType (token); while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN) && !isType (token, TOKEN_EOF)) { readToken (token); skipToMatched (token); } if (usesParens && !isType (token, TOKEN_CLOSE_PAREN)) { // we are at TOKEN_SEMICOLON readToken (token); } } while (!isType (token, TOKEN_EOF) && usesParens && !isType (token, TOKEN_CLOSE_PAREN)); }
int getToken(FILE* src, Token* tkn) { static char c; static int lookahead = 0; enum { S, /* Stato Iniziale */ ID_KW, /* Identificatore o Keyword */ BRACKET, /* Parentesi */ DIV_COM, /* Divisione o commento*/ START_COM, /* All'interno di un commento */ G_GE, /* Maggiore o maggiore-uguale*/ L_LE, /* Minore o minore-uguale*/ EQ_ASS, /* Uguaglianza o Assegnamento*/ NUM, /* Numero intero,virgola mobile(singola/doppia precisione)*/ NUM_R, /* Numero reale */ IN_STR, /* Stringa*/ NOT_EQ /* Non uguale*/ } currentState = S; int i = 0, eot = 0; tkn->type = ERROR; while (!eot && i < MAX_STRING_LENGTH) { if (!lookahead) c = fgetc(src); else lookahead = 0; if (c == EOF) break; else switch (currentState) { case S: if (isalpha(c) || c == '_') { currentState = ID_KW; tkn->value[i++] = c; tkn->type = IDENTIFIER; } else if (isspace(c)) currentState = S; else if (isdigit(c)) { currentState = NUM; tkn->value[i++] = c; tkn->type = INTEGER; } else switch (c) { case '+': tkn->type = PLUS; tkn->value[i++] = c; eot = 1; break; case '-': tkn->type = MINUS; tkn->value[i++] = c; eot = 1; break; case '*': tkn->type = ASTERIX; tkn->value[i++] = c; eot = 1; break; case '/': currentState = DIV_COM; tkn->value[i++] = c; break; case '=': currentState = EQ_ASS; tkn->value[i++] = c; break; case ';': tkn->type = SEMICOLON; tkn->value[i++] = c; eot = 1; break; case '.': tkn->type = DOT; tkn->value[i++] = c; eot = 1; break; case ',': tkn->type = COMMA; tkn->value[i++] = c; eot = 1; break; case '(' : tkn->type = LEFT_PAR; tkn->value[i++] = c; eot = 1; break; case ')' : tkn->type = RIGHT_PAR; tkn->value[i++] = c; eot = 1; break; case '[': tkn->type = LEFT_PAR; tkn->value[i++] = c; eot = 1; break; case ']': tkn->type = RIGHT_PAR; tkn->value[i++] = c; eot = 1; break; case '{': tkn->type = START_BLOCK; tkn->value[i++] = c; eot = 1; break; case '}': tkn->type = END_BLOCK; tkn->value[i++] = c; eot = 1; break; case '>': currentState = G_GE; tkn->value[i++] = c; break; case '<': currentState = L_LE; tkn->value[i++] = c; break; case '"': currentState = IN_STR; tkn->value[i++] = c; break; case '!': currentState = NOT_EQ; tkn->type = DIFFERENT; tkn->value[i++] = c; break; default: eot = 1; tkn->value[i++] = c; } break; case ID_KW: if (isalnum(c) || c == '_') { currentState = ID_KW; tkn->value[i++] = c; } else { lookahead = 1; eot = 1; } break; case START_COM: if (c == '/' || c == '\n') currentState = S; break; case G_GE: if (c == '=') { tkn->type = GREATER_OR_EQUAL; tkn->value[i++] = c; eot = 1; } else { tkn->type = GREATER; eot = lookahead = 1; } break; case L_LE: if (c == '=') { tkn->type = LESS_OR_EQUAL; tkn->value[i++] = c; eot = 1; } else { tkn->type = LESS; eot = lookahead = 1; } break; case EQ_ASS: if (c == '=') { tkn->type = EQUAL; tkn->value[i++] = c; eot = 1; } else { tkn->type = ASSIGNMENT; eot = lookahead = 1; } break; case NUM: if (isdigit(c)) tkn->value[i++] = c; else if (c == '.') { currentState = NUM_R; tkn->value[i++] = c; } else { tkn->type = INTEGER; eot = lookahead = 1; } break; case NUM_R: if (isdigit(c)) tkn->value[i++] = c; else if (c == 'f' || c == 'F') { tkn->type = FLOAT; tkn->value[i++] = c; eot = 1; } else if (c == 'l' || c == 'L') { tkn->type = DOUBLE; tkn->value[i++] = c; eot = 1; } else { tkn->type = DOUBLE; eot = lookahead = 1; } break; case IN_STR: if (c == '"') { eot = 1; tkn->type = STRING; tkn->value[i++] = c; } else { tkn->value[i++] = c; } break; case DIV_COM: if (c == '*' || c == '/') { currentState = START_COM; i--; } else { eot = lookahead = 1; tkn->type = SLASH; } break; case NOT_EQ: if (c == '=') { tkn->value[i++] = c; eot = 1; } else eot = lookahead = 1; break; } } tkn->value[i++] = '\0'; if (isKeyword(tkn->value)) tkn->type = KEYWORD; return (c != EOF); }