/* skip a comment - used while scanning */ void LexSkipComment(struct LexState *Lexer, char NextChar, enum LexToken *ReturnToken) { if (NextChar == '*') { /* conventional C comment */ while (Lexer->Pos != Lexer->End && (*(Lexer->Pos-1) != '*' || *Lexer->Pos != '/')) { if (*Lexer->Pos == '\n') Lexer->EmitExtraNewlines++; LEXER_INC(Lexer); } if (Lexer->Pos != Lexer->End) LEXER_INC(Lexer); Lexer->Mode = LexModeNormal; } else { /* C++ style comment */ while (Lexer->Pos != Lexer->End && *Lexer->Pos != '\n') LEXER_INC(Lexer); } }
/* get a reserved word or identifier - used while scanning */ enum LexToken LexGetWord(struct LexState *Lexer, struct Value *Value) { const char *StartPos = Lexer->Pos; enum LexToken Token; do { LEXER_INC(Lexer); } while (Lexer->Pos != Lexer->End && isCident((int)*Lexer->Pos)); Value->Typ = NULL; Value->Val->Identifier = TableStrRegister2(StartPos, Lexer->Pos - StartPos); Token = LexCheckReservedWord(Value->Val->Identifier); switch (Token) { case TokenHashInclude: Lexer->Mode = LexModeHashInclude; break; case TokenHashDefine: Lexer->Mode = LexModeHashDefine; break; default: break; } if (Token != TokenNone) return Token; if (Lexer->Mode == LexModeHashDefineSpace) Lexer->Mode = LexModeHashDefineSpaceIdent; return TokenIdentifier; }
/* get a character constant - used while scanning */ enum LexToken LexGetCharacterConstant(struct LexState *Lexer, struct Value *Value) { Value->Typ = &CharType; Value->Val->Character = LexUnEscapeCharacter(&Lexer->Pos, Lexer->End); if (Lexer->Pos != Lexer->End && *Lexer->Pos != '\'') LexFail(Lexer, "expected \"'\""); LEXER_INC(Lexer); return TokenCharacterConstant; }
/* get a single token from the source - used while scanning */ enum LexToken LexScanGetToken(struct LexState *Lexer, struct Value **Value) { char ThisChar; char NextChar; enum LexToken GotToken = TokenNone; /* handle cases line multi-line comments or string constants which mess up the line count */ if (Lexer->EmitExtraNewlines > 0) { Lexer->EmitExtraNewlines--; return TokenEndOfLine; } /* scan for a token */ do { *Value = &LexValue; while (Lexer->Pos != Lexer->End && isspace((int)*Lexer->Pos)) { if (*Lexer->Pos == '\n') { Lexer->Line++; Lexer->Pos++; Lexer->Mode = LexModeNormal; Lexer->CharacterPos = 0; return TokenEndOfLine; } else if (Lexer->Mode == LexModeHashDefine || Lexer->Mode == LexModeHashDefineSpace) Lexer->Mode = LexModeHashDefineSpace; else if (Lexer->Mode == LexModeHashDefineSpaceIdent) Lexer->Mode = LexModeNormal; LEXER_INC(Lexer); } if (Lexer->Pos == Lexer->End || *Lexer->Pos == '\0') return TokenEOF; ThisChar = *Lexer->Pos; if (isCidstart((int)ThisChar)) return LexGetWord(Lexer, *Value); if (isdigit((int)ThisChar)) return LexGetNumber(Lexer, *Value); NextChar = (Lexer->Pos+1 != Lexer->End) ? *(Lexer->Pos+1) : 0; LEXER_INC(Lexer); switch (ThisChar) { case '"': GotToken = LexGetStringConstant(Lexer, *Value, '"'); break; case '\'': GotToken = LexGetCharacterConstant(Lexer, *Value); break; case '(': if (Lexer->Mode == LexModeHashDefineSpaceIdent) GotToken = TokenOpenMacroBracket; else GotToken = TokenOpenBracket; Lexer->Mode = LexModeNormal; break; case ')': GotToken = TokenCloseBracket; break; case '=': NEXTIS('=', TokenEqual, TokenAssign); break; case '+': NEXTIS3('=', TokenAddAssign, '+', TokenIncrement, TokenPlus); break; case '-': NEXTIS4('=', TokenSubtractAssign, '>', TokenArrow, '-', TokenDecrement, TokenMinus); break; case '*': NEXTIS('=', TokenMultiplyAssign, TokenAsterisk); break; case '/': if (NextChar == '/' || NextChar == '*') { LEXER_INC(Lexer); LexSkipComment(Lexer, NextChar, &GotToken); } else NEXTIS('=', TokenDivideAssign, TokenSlash); break; case '%': NEXTIS('=', TokenModulusAssign, TokenModulus); break; case '<': if (Lexer->Mode == LexModeHashInclude) GotToken = LexGetStringConstant(Lexer, *Value, '>'); else { NEXTIS3PLUS('=', TokenLessEqual, '<', TokenShiftLeft, '=', TokenShiftLeftAssign, TokenLessThan); } break; case '>': NEXTIS3PLUS('=', TokenGreaterEqual, '>', TokenShiftRight, '=', TokenShiftRightAssign, TokenGreaterThan); break; case ';': GotToken = TokenSemicolon; break; case '&': NEXTIS3('=', TokenArithmeticAndAssign, '&', TokenLogicalAnd, TokenAmpersand); break; case '|': NEXTIS3('=', TokenArithmeticOrAssign, '|', TokenLogicalOr, TokenArithmeticOr); break; case '{': GotToken = TokenLeftBrace; break; case '}': GotToken = TokenRightBrace; break; case '[': GotToken = TokenLeftSquareBracket; break; case ']': GotToken = TokenRightSquareBracket; break; case '!': NEXTIS('=', TokenNotEqual, TokenUnaryNot); break; case '^': NEXTIS('=', TokenArithmeticExorAssign, TokenArithmeticExor); break; case '~': GotToken = TokenUnaryExor; break; case ',': GotToken = TokenComma; break; case '.': NEXTISEXACTLY3('.', '.', TokenEllipsis, TokenDot); break; case '?': GotToken = TokenQuestionMark; break; case ':': GotToken = TokenColon; break; default: LexFail(Lexer, "illegal character '%c' is %d", ThisChar, ThisChar); break; } } while (GotToken == TokenNone); return GotToken; }
/* get a string constant - used while scanning */ enum LexToken LexGetStringConstant(struct LexState *Lexer, struct Value *Value, char EndChar) { int Escape = FALSE; const char *StartPos = Lexer->Pos; const char *EndPos; char *EscBuf; char *EscBufPos; char *RegString; struct Value *ArrayValue; while (Lexer->Pos != Lexer->End && (*Lexer->Pos != EndChar || Escape)) { /* find the end */ if (Escape) { if (*Lexer->Pos == '\r' && Lexer->Pos+1 != Lexer->End) Lexer->Pos++; if (*Lexer->Pos == '\n' && Lexer->Pos+1 != Lexer->End) { Lexer->Line++; Lexer->Pos++; Lexer->CharacterPos = 0; Lexer->EmitExtraNewlines++; } Escape = FALSE; } else if (*Lexer->Pos == '\\') Escape = TRUE; LEXER_INC(Lexer); } EndPos = Lexer->Pos; EscBuf = HeapAllocStack(EndPos - StartPos); if (EscBuf == NULL) LexFail(Lexer, "out of memory"); for (EscBufPos = EscBuf, Lexer->Pos = StartPos; Lexer->Pos != EndPos;) *EscBufPos++ = LexUnEscapeCharacter(&Lexer->Pos, EndPos); /* try to find an existing copy of this string literal */ RegString = TableStrRegister2(EscBuf, EscBufPos - EscBuf); HeapPopStack(EscBuf, EndPos - StartPos); ArrayValue = VariableStringLiteralGet(RegString); if (ArrayValue == NULL) { /* create and store this string literal */ ArrayValue = VariableAllocValueAndData(NULL, 0, FALSE, NULL, TRUE); ArrayValue->Typ = CharArrayType; ArrayValue->Val = (union AnyValue *)RegString; VariableStringLiteralDefine(RegString, ArrayValue); } /* create the the pointer for this char* */ Value->Typ = CharPtrType; Value->Val->Pointer = RegString; if (*Lexer->Pos == EndChar) LEXER_INC(Lexer); return TokenStringConstant; }
/* get a numeric literal - used while scanning */ enum LexToken LexGetNumber(struct LexState *Lexer, struct Value *Value) { int Result = 0; int Base = 10; enum LexToken ResultToken; #ifndef NO_FP double FPResult; double FPDiv; #endif if (*Lexer->Pos == '0') { /* a binary, octal or hex literal */ LEXER_INC(Lexer); if (Lexer->Pos != Lexer->End) { if (*Lexer->Pos == 'x' || *Lexer->Pos == 'X') { Base = 16; LEXER_INC(Lexer); } else if (*Lexer->Pos == 'b' || *Lexer->Pos == 'B') { Base = 2; LEXER_INC(Lexer); } else if (*Lexer->Pos != '.') Base = 8; } } /* get the value */ for (; Lexer->Pos != Lexer->End && IS_BASE_DIGIT(*Lexer->Pos, Base); LEXER_INC(Lexer)) Result = Result * Base + GET_BASE_DIGIT(*Lexer->Pos); if (Result >= 0 && Result <= MAX_CHAR_VALUE) { Value->Typ = &CharType; Value->Val->Character = Result; ResultToken = TokenCharacterConstant; } else { Value->Typ = &IntType; Value->Val->Integer = Result; ResultToken = TokenIntegerConstant; } if (Lexer->Pos == Lexer->End) return ResultToken; if (*Lexer->Pos == 'l' || *Lexer->Pos == 'L') { LEXER_INC(Lexer); return ResultToken; } #ifndef NO_FP if (Lexer->Pos == Lexer->End || *Lexer->Pos != '.') return ResultToken; Value->Typ = &FPType; LEXER_INC(Lexer); for (FPDiv = 1.0/Base, FPResult = (double)Result; Lexer->Pos != Lexer->End && IS_BASE_DIGIT(*Lexer->Pos, Base); LEXER_INC(Lexer), FPDiv /= (double)Base) FPResult += GET_BASE_DIGIT(*Lexer->Pos) * FPDiv; if (Lexer->Pos != Lexer->End && (*Lexer->Pos == 'e' || *Lexer->Pos == 'E')) { double ExponentMultiplier = 1.0; LEXER_INC(Lexer); if (Lexer->Pos != Lexer->End && *Lexer->Pos == '-') { ExponentMultiplier = -1.0; LEXER_INC(Lexer); } for (Result = 0; Lexer->Pos != Lexer->End && IS_BASE_DIGIT(*Lexer->Pos, Base); LEXER_INC(Lexer)) Result = Result * (double)Base + GET_BASE_DIGIT(*Lexer->Pos); FPResult *= pow((double)Base, (double)Result * ExponentMultiplier); } Value->Val->FP = FPResult; return TokenFPConstant; #else return ResultToken; #endif }
/* get a numeric literal - used while scanning */ enum LexToken LexGetNumber(Picoc *pc, struct LexState *Lexer, struct Value *Value) { long Result = 0; long Base = 10; enum LexToken ResultToken; #ifndef NO_FP double FPResult; double FPDiv; #endif /* long/unsigned flags */ #if 0 /* unused for now */ char IsLong = 0; char IsUnsigned = 0; #endif if (*Lexer->Pos == '0') { /* a binary, octal or hex literal */ LEXER_INC(Lexer); if (Lexer->Pos != Lexer->End) { if (*Lexer->Pos == 'x' || *Lexer->Pos == 'X') { Base = 16; LEXER_INC(Lexer); } else if (*Lexer->Pos == 'b' || *Lexer->Pos == 'B') { Base = 2; LEXER_INC(Lexer); } else if (*Lexer->Pos != '.') Base = 8; } } /* get the value */ for (; Lexer->Pos != Lexer->End && IS_BASE_DIGIT(*Lexer->Pos, Base); LEXER_INC(Lexer)) Result = Result * Base + GET_BASE_DIGIT(*Lexer->Pos); if (*Lexer->Pos == 'u' || *Lexer->Pos == 'U') { LEXER_INC(Lexer); /* IsUnsigned = 1; */ } if (*Lexer->Pos == 'l' || *Lexer->Pos == 'L') { LEXER_INC(Lexer); /* IsLong = 1; */ } Value->Typ = &pc->LongType; /* ignored? */ Value->Val->LongInteger = Result; ResultToken = TokenIntegerConstant; if (Lexer->Pos == Lexer->End) return ResultToken; #ifndef NO_FP if (Lexer->Pos == Lexer->End) { return ResultToken; } if (*Lexer->Pos != '.' && *Lexer->Pos != 'e' && *Lexer->Pos != 'E') { return ResultToken; } Value->Typ = &pc->FPType; FPResult = (double)Result; if (*Lexer->Pos == '.') { LEXER_INC(Lexer); for (FPDiv = 1.0/Base; Lexer->Pos != Lexer->End && IS_BASE_DIGIT(*Lexer->Pos, Base); LEXER_INC(Lexer), FPDiv /= (double)Base) { FPResult += GET_BASE_DIGIT(*Lexer->Pos) * FPDiv; } } if (Lexer->Pos != Lexer->End && (*Lexer->Pos == 'e' || *Lexer->Pos == 'E')) { int ExponentSign = 1; LEXER_INC(Lexer); if (Lexer->Pos != Lexer->End && *Lexer->Pos == '-') { ExponentSign = -1; LEXER_INC(Lexer); } Result = 0; while (Lexer->Pos != Lexer->End && IS_BASE_DIGIT(*Lexer->Pos, Base)) { Result = Result * Base + GET_BASE_DIGIT(*Lexer->Pos); LEXER_INC(Lexer); } FPResult *= pow((double)Base, (double)Result * ExponentSign); } Value->Val->FP = FPResult; if (*Lexer->Pos == 'f' || *Lexer->Pos == 'F') LEXER_INC(Lexer); return TokenFPConstant; #else return ResultToken; #endif }