void VarTokenizer::advance (void)
{
	DE_ASSERT(m_token != TOKEN_END);

	m_tokenStart	+= m_tokenLen;
	m_token			 = TOKEN_LAST;
	m_tokenLen		 = 1;

	if (m_str[m_tokenStart] == '[')
		m_token = TOKEN_LEFT_BRACKET;
	else if (m_str[m_tokenStart] == ']')
		m_token = TOKEN_RIGHT_BRACKET;
	else if (m_str[m_tokenStart] == 0)
		m_token = TOKEN_END;
	else if (m_str[m_tokenStart] == '.')
		m_token = TOKEN_PERIOD;
	else if (isNum(m_str[m_tokenStart]))
	{
		m_token = TOKEN_NUMBER;
		while (isNum(m_str[m_tokenStart+m_tokenLen]))
			m_tokenLen += 1;
	}
	else if (isIdentifierChar(m_str[m_tokenStart]))
	{
		m_token = TOKEN_IDENTIFIER;
		while (isIdentifierChar(m_str[m_tokenStart+m_tokenLen]))
			m_tokenLen += 1;
	}
	else
		TCU_FAIL("Unexpected character");
}
Exemple #2
0
static void findFalconTags (void)
{
    vString *name = vStringNew ();
    const unsigned char *line;

    while ((line = fileReadLine ()) != NULL)
    {
        const unsigned char *cp = line;

        if (*cp == '#')
            continue;

        if (strncmp ((const char*) cp, "function", (size_t) 8) == 0)
        {
            cp += 8;
            cp = skipSpace (cp);
            
            while (isIdentifierChar ((int) *cp))
            {
                vStringPut (name, (int) *cp);
                ++cp;
            }
            vStringTerminate (name);
            makeSimpleTag (name, FalconKinds, K_FUNCTION);
            vStringClear (name);
        }
        else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0)
        {
            cp += 5;
            cp = skipSpace (cp);
            
            while (isIdentifierChar ((int) *cp))
            {
                vStringPut (name, (int) *cp);
                ++cp;
            }
            vStringTerminate (name);
            makeSimpleTag (name, FalconKinds, K_CLASS);
            vStringClear (name);
        }
        else if (strncmp ((const char*) cp, "load", (size_t) 4) == 0)
        {
            cp += 4;
            cp = skipSpace (cp);
            
            while (isIdentifierChar ((int) *cp))
            {
                vStringPut (name, (int) *cp);
                ++cp;
            }
            vStringTerminate (name);
            makeSimpleTag (name, FalconKinds, K_NAMESPACE);
            vStringClear (name);
        }
    }
    vStringDelete (name);
}
bool Toker::isTokEnd()
{
	if (isspace(nextChar))
		return true;
	if (isSingleCharTok(nextChar) || isSingleCharTok(currChar))
		return true;
	if (isIdentifierChar(currChar) && !isIdentifierChar(nextChar))
		return true;
	if (!isIdentifierChar(currChar) && isIdentifierChar(nextChar))
		return true;
	if (isFileEnd())
		return true;
	return false;
}
Exemple #4
0
void
NormalizeIdentifier(char *idstr)
{
    char *ptr = idstr;
    int needCap = 1;
    while (*ptr) {
        if (needCap && isalpha(*ptr)) {
            *ptr = toupper(*ptr);
            needCap = 0;
        } else if (!isIdentifierChar(*ptr)) {
            *ptr = '_';
        } else {
            *ptr = tolower(*ptr);
        }
        ptr++;
    }
}
Exemple #5
0
/*
 * "canonicalize" an identifier, to make sure it
 * does not conflict with any C reserved words
 * also checks for spaces and other illegal characters within the name
 */
bool
Is_C_Reserved(const char *name)
{
    Symbol *s;
    const char *ptr;
    s = FindSymbol(&ckeywords, name);
    if (s && !strcmp(name, s->name))
        return true;
    if (strlen(name) < 3)
        return false;
    for (ptr = name; *ptr; ptr++) {
        if (!isIdentifierChar(*ptr)) return true;
    }
    if (ptr[-2] == '_' && ptr[-1] == 't')
        return true;

    return false;
}
Exemple #6
0
/* parse an identifier */
static int
parseIdentifier(LexStream *L, AST **ast_ptr, const char *prefix)
{
    int c;
    struct flexbuf fb;
    Symbol *sym;
    AST *ast = NULL;
    int startColumn = L->colCounter - 1;
    char *idstr;

    flexbuf_init(&fb, INCSTR);
    if (prefix) {
        flexbuf_addmem(&fb, prefix, strlen(prefix));
        if (gl_gas_dat) {
            flexbuf_addchar(&fb, '.');
        } else {
            flexbuf_addchar(&fb, ':');
        }
    }
    c = lexgetc(L);
    while (isIdentifierChar(c)) {
        //flexbuf_addchar(&fb, tolower(c));
        flexbuf_addchar(&fb, c);
        c = lexgetc(L);
    }
    // add a trailing 0, and make sure there is room for an extra
    // character in case the name mangling needs it
    flexbuf_addchar(&fb, '\0');
    flexbuf_addchar(&fb, '\0');
    idstr = flexbuf_get(&fb);
    lexungetc(L, c);

    /* check for reserved words */
    if (InDatBlock(L)) {
        sym = FindSymbol(&pasmWords, idstr);
        if (sym) {
            free(idstr);
            if (sym->type == SYM_INSTR) {
                ast = NewAST(AST_INSTR, NULL, NULL);
                ast->d.ptr = sym->val;
                *ast_ptr = ast;
                return T_INSTR;
            }
            if (sym->type == SYM_INSTRMODIFIER) {
                ast = NewAST(AST_INSTRMODIFIER, NULL, NULL);
                ast->d.ptr = sym->val;
                *ast_ptr = ast;
                return T_INSTRMODIFIER;
            }
            fprintf(stderr, "Internal error: Unknown pasm symbol type %d\n", sym->type);
        }
    }
    sym = FindSymbol(&reservedWords, idstr);
    if (sym != NULL) {
        if (sym->type == SYM_BUILTIN)
        {
            /* run any parse hooks */
            Builtin *b = (Builtin *)sym->val;
            if (b && b->parsehook) {
                (*b->parsehook)(b);
            }
            goto is_identifier;
        }
        if (sym->type == SYM_CONSTANT
            || sym->type == SYM_FLOAT_CONSTANT)
        {
            goto is_identifier;
        }
        free(idstr);
        if (sym->type == SYM_RESERVED) {
            c = INTVAL(sym);
            /* check for special handling */
            switch(c) {
            case T_PUB:
            case T_PRI:
            case T_DAT:
            case T_OBJ:
            case T_VAR:
            case T_CON:
                L->in_block = c;
                L->block_firstline = L->lineCounter;
                //EstablishIndent(L, 1);
                break;
	    case T_ASM:
	        if (L->in_block == T_ASM) {
		    fprintf(stderr, "WARNING: ignoring nested asm\n");
		} else {
		    L->save_block = L->in_block;
		}
		L->in_block = c;
		break;
	    case T_ENDASM:
	        L->in_block = L->save_block;
	        break;
            case T_IF:
            case T_IFNOT:
            case T_ELSE:
            case T_ELSEIF:
            case T_ELSEIFNOT:
            case T_REPEAT:
            case T_CASE:
                EstablishIndent(L, startColumn);
                break;
            default:
                break;
            }
            if (!ast)
                ast = GetComments();
            *ast_ptr = ast;
            return c;
        }
        if (sym->type == SYM_HWREG) {
            ast = NewAST(AST_HWREG, NULL, NULL);
            ast->d.ptr = sym->val;
            *ast_ptr = ast;
            return T_HWREG;
        }
        fprintf(stderr, "Internal error: Unknown symbol type %d\n", sym->type);
    }

is_identifier:
    ast = NewAST(AST_IDENTIFIER, NULL, NULL);
    /* make sure identifiers do not conflict with C keywords */
    if (gl_normalizeIdents || Is_C_Reserved(idstr)) {
        NormalizeIdentifier(idstr);
    }
    ast->d.string = idstr;
    *ast_ptr = ast;
    return T_IDENTIFIER;
}
Exemple #7
0
void
LexBase::nextToken(LexToken &token)
{
	StringBuffer		spelling;
	bool				found;
	short				funcType;
	short				symbol;

	//--------
	// Skip leading white space
	//--------
	while (m_ch.isSpace()) {
		nextChar();
	}

	//--------
	// Check for EOF.
	//--------
	if (m_atEOF) {
		if (m_sourceType == Configuration::INPUT_STRING) {
			token.reset(LEX_EOF_SYM, m_lineNum, "<end of string>");
		} else {
			token.reset(LEX_EOF_SYM, m_lineNum, "<end of file>");
		}
		return;
	}

	//--------
	// Note the line number at the start of the token
	//--------
	const int	lineNum = m_lineNum;

	//--------
	// Miscellaneous kinds of tokens.
	//--------
	switch (m_ch.c_str()[0]) {
	case '?':
		nextChar();
		if (m_ch == '=') {
			nextChar();
			token.reset(LEX_QUESTION_EQUALS_SYM, lineNum, "?=");
		} else {
			token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str());
		}
		return;
	case '!':
		nextChar();
		if (m_ch == '=') {
			nextChar();
			token.reset(LEX_NOT_EQUALS_SYM, lineNum, "!=");
		} else {
			token.reset(LEX_NOT_SYM, lineNum, "!");
		}
		return;
	case '@':
		spelling.append(m_ch.c_str());
		nextChar();
		while (!m_atEOF && isKeywordChar(m_ch)) {
			spelling.append(m_ch.c_str());
			nextChar();
		}
		searchForKeyword(spelling.c_str(), found, symbol);
		if (found) {
			token.reset(symbol, lineNum, spelling.c_str());
		} else {
			token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str());
		}
		return;
	case '+':
		nextChar();
		token.reset(LEX_PLUS_SYM, lineNum, "+");
		return;
	case '&':
		nextChar();
		if (m_ch == '&') {
			nextChar();
			token.reset(LEX_AND_SYM, lineNum, "&&");
		} else {
			spelling << '&' << m_ch.c_str();
			token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str());
		}
		return;
	case '|':
		nextChar();
		if (m_ch == '|') {
			nextChar();
			token.reset(LEX_OR_SYM, lineNum, "||");
		} else {
			spelling << '|' << m_ch.c_str();
			token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str());
		}
		return;
	case '=':
		nextChar();
		if (m_ch == '=') {
			nextChar();
			token.reset(LEX_EQUALS_EQUALS_SYM, lineNum, "==");
		} else {
			token.reset(LEX_EQUALS_SYM, lineNum, "=");
		}
		return;
	case ';':
		nextChar();
		token.reset(LEX_SEMICOLON_SYM, lineNum, ";");
		return;
	case '[':
		nextChar();
		token.reset(LEX_OPEN_BRACKET_SYM, lineNum, "[");
		return;
	case ']':
		nextChar();
		token.reset(LEX_CLOSE_BRACKET_SYM, lineNum, "]");
		return;
	case '{':
		nextChar();
		token.reset(LEX_OPEN_BRACE_SYM, lineNum, "{");
		return;
	case '}':
		nextChar();
		token.reset(LEX_CLOSE_BRACE_SYM, lineNum, "}");
		return;
	case '(':
		nextChar();
		token.reset(LEX_OPEN_PAREN_SYM, lineNum, "(");
		return;
	case ')':
		nextChar();
		token.reset(LEX_CLOSE_PAREN_SYM, lineNum, ")");
		return;
	case ',':
		nextChar();
		token.reset(LEX_COMMA_SYM, lineNum, ",");
		return;
	case '"':
		consumeString(token);
		return;;
	case '<':
		nextChar();
		if (m_ch != '%') {
			token.reset(LEX_UNKNOWN_SYM, lineNum, "<");
			return;
		}
		nextChar(); // skip over '%'
		consumeBlockString(token);
		return;
	case '#':
		//--------
		// A comment. Consume it and immediately following
		// comments (without resorting to recursion).
		//--------
		while (m_ch == '#') {
			//--------
			// Skip to the end of line
			//--------
			while (!m_atEOF && m_ch != '\n') {
				nextChar();
			}
			if (m_ch == '\n') {
				nextChar();
			}
			//--------
			// Skip leading white space on the next line
			//--------
			while (m_ch.isSpace()) {
				nextChar();
			}
			//--------
			// Potentially loop around again to consume
			// more comment lines that follow immediately.
			//--------
		}
		//--------
		// Now use (a guaranteed single level of) recursion
		// to obtain the next (non-comment) token.
		//--------
		nextToken(token);
		return;
	}

	//--------
	// Is it a function or identifier?
	//--------
	if (isIdentifierChar(m_ch)) {
		//--------
		// Consume all the identifier characters
		// but not an immediately following "(", if any
		//--------
		spelling.append(m_ch.c_str());
		nextChar();
		while (!m_atEOF && isIdentifierChar(m_ch)) {
			spelling.append(m_ch.c_str());
			nextChar();
		}

		//--------
		// If "(" follows immediately then it is (supposed to be)
		// a function.
		//--------
		if (m_ch == '(') {
			spelling.append(m_ch.c_str());
			nextChar();
			searchForFunction(spelling.c_str(), found, funcType, symbol);
			if (found) {
				token.reset(symbol, lineNum, spelling.c_str(), funcType);
			} else {
				token.reset(LEX_UNKNOWN_FUNC_SYM, lineNum, spelling.c_str());
			}
			return;
		}

		//--------
		// It's not a function so it looks like an identifier.
		// Better check it's a legal identifier.
		//--------
		if (strcmp(spelling.c_str(), ".") == 0) {
			token.reset(LEX_SOLE_DOT_IDENT_SYM, lineNum, spelling.c_str());
		} else if (strstr(spelling.c_str(), "..") != 0) {
			token.reset(LEX_TWO_DOTS_IDENT_SYM, lineNum, spelling.c_str());
		} else {
			try {
				m_uidIdentifierProcessor->expand(spelling);
				token.resetWithOwnership(LEX_IDENT_SYM, lineNum, spelling);
			} catch (const ConfigurationException &) {
				token.resetWithOwnership(LEX_ILLEGAL_IDENT_SYM, lineNum,
				                         spelling);
			}
		}
		return;
	}

	//--------
	// None of the above
	//--------
	spelling << m_ch.c_str();
	nextChar();
	token.reset(LEX_UNKNOWN_SYM, lineNum, spelling.c_str());
}
Exemple #8
0
static void findFalconTags (void)
{
    vString *name = vStringNew ();
    const unsigned char *line;

    while ((line = readLineFromInputFile ()) != NULL)
    {
        const unsigned char *cp = line;

        // Skip lines starting with # which in falcon
        // would only be the "crunch bang" statement
        if (*cp == '#')
            continue;

        if (strncmp ((const char*) cp, "function", (size_t) 8) == 0)
        {
            cp += 8;
            cp = skipSpace (cp);
            
            while (isIdentifierChar ((int) *cp))
            {
                vStringPut (name, (int) *cp);
                ++cp;
            }
            makeSimpleTag (name, FalconKinds, K_FUNCTION);
            vStringClear (name);
        }
        else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0)
        {
            cp += 5;
            cp = skipSpace (cp);
            
            while (isIdentifierChar ((int) *cp))
            {
                vStringPut (name, (int) *cp);
                ++cp;
            }
            makeSimpleTag (name, FalconKinds, K_CLASS);
            vStringClear (name);
        }
        else if (strncmp ((const char*) cp, "load", (size_t) 4) == 0)
        {
            cp += 4;
            cp = skipSpace (cp);
            
            while (isIdentifierChar ((int) *cp))
            {
                vStringPut (name, (int) *cp);
                ++cp;
            }
            makeSimpleTag (name, FalconKinds, K_NAMESPACE);
            vStringClear (name);
        }
        else if (strncmp ((const char*) cp, "import from", (size_t) 11) == 0)
        {
            cp += 12;
            cp = skipSpace (cp);
            
            while (isIdentifierChar ((int) *cp))
            {
                vStringPut (name, (int) *cp);
                ++cp;
            }
            makeSimpleTag (name, FalconKinds, K_NAMESPACE);
            vStringClear (name);
        }
    }
    vStringDelete (name);
}