void VarTokenizer::advance (void) { DE_ASSERT(m_token != TOKEN_END); m_tokenStart += m_tokenLen; m_token = TOKEN_LAST; m_tokenLen = 1; if (m_str[m_tokenStart] == '[') m_token = TOKEN_LEFT_BRACKET; else if (m_str[m_tokenStart] == ']') m_token = TOKEN_RIGHT_BRACKET; else if (m_str[m_tokenStart] == 0) m_token = TOKEN_END; else if (m_str[m_tokenStart] == '.') m_token = TOKEN_PERIOD; else if (isNum(m_str[m_tokenStart])) { m_token = TOKEN_NUMBER; while (isNum(m_str[m_tokenStart+m_tokenLen])) m_tokenLen += 1; } else if (isIdentifierChar(m_str[m_tokenStart])) { m_token = TOKEN_IDENTIFIER; while (isIdentifierChar(m_str[m_tokenStart+m_tokenLen])) m_tokenLen += 1; } else TCU_FAIL("Unexpected character"); }
static void findFalconTags (void) { vString *name = vStringNew (); const unsigned char *line; while ((line = fileReadLine ()) != NULL) { const unsigned char *cp = line; if (*cp == '#') continue; if (strncmp ((const char*) cp, "function", (size_t) 8) == 0) { cp += 8; cp = skipSpace (cp); while (isIdentifierChar ((int) *cp)) { vStringPut (name, (int) *cp); ++cp; } vStringTerminate (name); makeSimpleTag (name, FalconKinds, K_FUNCTION); vStringClear (name); } else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0) { cp += 5; cp = skipSpace (cp); while (isIdentifierChar ((int) *cp)) { vStringPut (name, (int) *cp); ++cp; } vStringTerminate (name); makeSimpleTag (name, FalconKinds, K_CLASS); vStringClear (name); } else if (strncmp ((const char*) cp, "load", (size_t) 4) == 0) { cp += 4; cp = skipSpace (cp); while (isIdentifierChar ((int) *cp)) { vStringPut (name, (int) *cp); ++cp; } vStringTerminate (name); makeSimpleTag (name, FalconKinds, K_NAMESPACE); vStringClear (name); } } vStringDelete (name); }
bool Toker::isTokEnd() { if (isspace(nextChar)) return true; if (isSingleCharTok(nextChar) || isSingleCharTok(currChar)) return true; if (isIdentifierChar(currChar) && !isIdentifierChar(nextChar)) return true; if (!isIdentifierChar(currChar) && isIdentifierChar(nextChar)) return true; if (isFileEnd()) return true; return false; }
void NormalizeIdentifier(char *idstr) { char *ptr = idstr; int needCap = 1; while (*ptr) { if (needCap && isalpha(*ptr)) { *ptr = toupper(*ptr); needCap = 0; } else if (!isIdentifierChar(*ptr)) { *ptr = '_'; } else { *ptr = tolower(*ptr); } ptr++; } }
/* * "canonicalize" an identifier, to make sure it * does not conflict with any C reserved words * also checks for spaces and other illegal characters within the name */ bool Is_C_Reserved(const char *name) { Symbol *s; const char *ptr; s = FindSymbol(&ckeywords, name); if (s && !strcmp(name, s->name)) return true; if (strlen(name) < 3) return false; for (ptr = name; *ptr; ptr++) { if (!isIdentifierChar(*ptr)) return true; } if (ptr[-2] == '_' && ptr[-1] == 't') return true; return false; }
/* parse an identifier */ static int parseIdentifier(LexStream *L, AST **ast_ptr, const char *prefix) { int c; struct flexbuf fb; Symbol *sym; AST *ast = NULL; int startColumn = L->colCounter - 1; char *idstr; flexbuf_init(&fb, INCSTR); if (prefix) { flexbuf_addmem(&fb, prefix, strlen(prefix)); if (gl_gas_dat) { flexbuf_addchar(&fb, '.'); } else { flexbuf_addchar(&fb, ':'); } } c = lexgetc(L); while (isIdentifierChar(c)) { //flexbuf_addchar(&fb, tolower(c)); flexbuf_addchar(&fb, c); c = lexgetc(L); } // add a trailing 0, and make sure there is room for an extra // character in case the name mangling needs it flexbuf_addchar(&fb, '\0'); flexbuf_addchar(&fb, '\0'); idstr = flexbuf_get(&fb); lexungetc(L, c); /* check for reserved words */ if (InDatBlock(L)) { sym = FindSymbol(&pasmWords, idstr); if (sym) { free(idstr); if (sym->type == SYM_INSTR) { ast = NewAST(AST_INSTR, NULL, NULL); ast->d.ptr = sym->val; *ast_ptr = ast; return T_INSTR; } if (sym->type == SYM_INSTRMODIFIER) { ast = NewAST(AST_INSTRMODIFIER, NULL, NULL); ast->d.ptr = sym->val; *ast_ptr = ast; return T_INSTRMODIFIER; } fprintf(stderr, "Internal error: Unknown pasm symbol type %d\n", sym->type); } } sym = FindSymbol(&reservedWords, idstr); if (sym != NULL) { if (sym->type == SYM_BUILTIN) { /* run any parse hooks */ Builtin *b = (Builtin *)sym->val; if (b && b->parsehook) { (*b->parsehook)(b); } goto is_identifier; } if (sym->type == SYM_CONSTANT || sym->type == SYM_FLOAT_CONSTANT) { goto is_identifier; } free(idstr); if (sym->type == SYM_RESERVED) { c = INTVAL(sym); /* check for special handling */ switch(c) { case T_PUB: case T_PRI: case T_DAT: case T_OBJ: case T_VAR: case T_CON: L->in_block = c; L->block_firstline = L->lineCounter; //EstablishIndent(L, 1); break; case T_ASM: if (L->in_block == T_ASM) { fprintf(stderr, "WARNING: ignoring nested asm\n"); } else { L->save_block = L->in_block; } L->in_block = c; break; case T_ENDASM: L->in_block = L->save_block; break; case T_IF: case T_IFNOT: case T_ELSE: case T_ELSEIF: case T_ELSEIFNOT: case T_REPEAT: case T_CASE: EstablishIndent(L, startColumn); break; default: break; } if (!ast) ast = GetComments(); *ast_ptr = ast; return c; } if (sym->type == SYM_HWREG) { ast = NewAST(AST_HWREG, NULL, NULL); ast->d.ptr = sym->val; *ast_ptr = ast; return T_HWREG; } fprintf(stderr, "Internal error: Unknown symbol type %d\n", sym->type); } is_identifier: ast = NewAST(AST_IDENTIFIER, NULL, NULL); /* make sure identifiers do not conflict with C keywords */ if (gl_normalizeIdents || Is_C_Reserved(idstr)) { NormalizeIdentifier(idstr); } ast->d.string = idstr; *ast_ptr = ast; return T_IDENTIFIER; }
void LexBase::nextToken(LexToken &token) { StringBuffer spelling; bool found; short funcType; short symbol; //-------- // Skip leading white space //-------- while (m_ch.isSpace()) { nextChar(); } //-------- // Check for EOF. //-------- if (m_atEOF) { if (m_sourceType == Configuration::INPUT_STRING) { token.reset(LEX_EOF_SYM, m_lineNum, "<end of string>"); } else { token.reset(LEX_EOF_SYM, m_lineNum, "<end of file>"); } return; } //-------- // Note the line number at the start of the token //-------- const int lineNum = m_lineNum; //-------- // Miscellaneous kinds of tokens. //-------- switch (m_ch.c_str()[0]) { case '?': nextChar(); if (m_ch == '=') { nextChar(); token.reset(LEX_QUESTION_EQUALS_SYM, lineNum, "?="); } else { token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str()); } return; case '!': nextChar(); if (m_ch == '=') { nextChar(); token.reset(LEX_NOT_EQUALS_SYM, lineNum, "!="); } else { token.reset(LEX_NOT_SYM, lineNum, "!"); } return; case '@': spelling.append(m_ch.c_str()); nextChar(); while (!m_atEOF && isKeywordChar(m_ch)) { spelling.append(m_ch.c_str()); nextChar(); } searchForKeyword(spelling.c_str(), found, symbol); if (found) { token.reset(symbol, lineNum, spelling.c_str()); } else { token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str()); } return; case '+': nextChar(); token.reset(LEX_PLUS_SYM, lineNum, "+"); return; case '&': nextChar(); if (m_ch == '&') { nextChar(); token.reset(LEX_AND_SYM, lineNum, "&&"); } else { spelling << '&' << m_ch.c_str(); token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str()); } return; case '|': nextChar(); if (m_ch == '|') { nextChar(); token.reset(LEX_OR_SYM, lineNum, "||"); } else { spelling << '|' << m_ch.c_str(); token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str()); } return; case '=': nextChar(); if (m_ch == '=') { nextChar(); token.reset(LEX_EQUALS_EQUALS_SYM, lineNum, "=="); } else { token.reset(LEX_EQUALS_SYM, lineNum, "="); } return; case ';': nextChar(); token.reset(LEX_SEMICOLON_SYM, lineNum, ";"); return; case '[': nextChar(); token.reset(LEX_OPEN_BRACKET_SYM, lineNum, "["); return; case ']': nextChar(); token.reset(LEX_CLOSE_BRACKET_SYM, lineNum, "]"); return; case '{': nextChar(); token.reset(LEX_OPEN_BRACE_SYM, lineNum, "{"); return; case '}': nextChar(); token.reset(LEX_CLOSE_BRACE_SYM, lineNum, "}"); return; case '(': nextChar(); token.reset(LEX_OPEN_PAREN_SYM, lineNum, "("); return; case ')': nextChar(); token.reset(LEX_CLOSE_PAREN_SYM, lineNum, ")"); return; case ',': nextChar(); token.reset(LEX_COMMA_SYM, lineNum, ","); return; case '"': consumeString(token); return;; case '<': nextChar(); if (m_ch != '%') { token.reset(LEX_UNKNOWN_SYM, lineNum, "<"); return; } nextChar(); // skip over '%' consumeBlockString(token); return; case '#': //-------- // A comment. Consume it and immediately following // comments (without resorting to recursion). //-------- while (m_ch == '#') { //-------- // Skip to the end of line //-------- while (!m_atEOF && m_ch != '\n') { nextChar(); } if (m_ch == '\n') { nextChar(); } //-------- // Skip leading white space on the next line //-------- while (m_ch.isSpace()) { nextChar(); } //-------- // Potentially loop around again to consume // more comment lines that follow immediately. //-------- } //-------- // Now use (a guaranteed single level of) recursion // to obtain the next (non-comment) token. //-------- nextToken(token); return; } //-------- // Is it a function or identifier? //-------- if (isIdentifierChar(m_ch)) { //-------- // Consume all the identifier characters // but not an immediately following "(", if any //-------- spelling.append(m_ch.c_str()); nextChar(); while (!m_atEOF && isIdentifierChar(m_ch)) { spelling.append(m_ch.c_str()); nextChar(); } //-------- // If "(" follows immediately then it is (supposed to be) // a function. //-------- if (m_ch == '(') { spelling.append(m_ch.c_str()); nextChar(); searchForFunction(spelling.c_str(), found, funcType, symbol); if (found) { token.reset(symbol, lineNum, spelling.c_str(), funcType); } else { token.reset(LEX_UNKNOWN_FUNC_SYM, lineNum, spelling.c_str()); } return; } //-------- // It's not a function so it looks like an identifier. // Better check it's a legal identifier. //-------- if (strcmp(spelling.c_str(), ".") == 0) { token.reset(LEX_SOLE_DOT_IDENT_SYM, lineNum, spelling.c_str()); } else if (strstr(spelling.c_str(), "..") != 0) { token.reset(LEX_TWO_DOTS_IDENT_SYM, lineNum, spelling.c_str()); } else { try { m_uidIdentifierProcessor->expand(spelling); token.resetWithOwnership(LEX_IDENT_SYM, lineNum, spelling); } catch (const ConfigurationException &) { token.resetWithOwnership(LEX_ILLEGAL_IDENT_SYM, lineNum, spelling); } } return; } //-------- // None of the above //-------- spelling << m_ch.c_str(); nextChar(); token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str()); }
static void findFalconTags (void) { vString *name = vStringNew (); const unsigned char *line; while ((line = readLineFromInputFile ()) != NULL) { const unsigned char *cp = line; // Skip lines starting with # which in falcon // would only be the "crunch bang" statement if (*cp == '#') continue; if (strncmp ((const char*) cp, "function", (size_t) 8) == 0) { cp += 8; cp = skipSpace (cp); while (isIdentifierChar ((int) *cp)) { vStringPut (name, (int) *cp); ++cp; } makeSimpleTag (name, FalconKinds, K_FUNCTION); vStringClear (name); } else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0) { cp += 5; cp = skipSpace (cp); while (isIdentifierChar ((int) *cp)) { vStringPut (name, (int) *cp); ++cp; } makeSimpleTag (name, FalconKinds, K_CLASS); vStringClear (name); } else if (strncmp ((const char*) cp, "load", (size_t) 4) == 0) { cp += 4; cp = skipSpace (cp); while (isIdentifierChar ((int) *cp)) { vStringPut (name, (int) *cp); ++cp; } makeSimpleTag (name, FalconKinds, K_NAMESPACE); vStringClear (name); } else if (strncmp ((const char*) cp, "import from", (size_t) 11) == 0) { cp += 12; cp = skipSpace (cp); while (isIdentifierChar ((int) *cp)) { vStringPut (name, (int) *cp); ++cp; } makeSimpleTag (name, FalconKinds, K_NAMESPACE); vStringClear (name); } } vStringDelete (name); }