/
lexer.cpp
177 lines (160 loc) · 5.31 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#include "calcLex.h"
char calcText[CALCTEXT_MAX]; //this holds the token name
static int calcTextPos; //static keyword keeps var around, no re-initialization with each call of the lexer
//this function opens the "filestream" var and returns true or false for success
ifstream filestream;
bool calcLexOpen(const char filename[])
{
filestream.open(filename, ios_base::in);
//returning true or false if the filestream is open
return filestream.is_open();
}
//this function resets the token's info
void calcTextClear()
{
calcTextPos = 0;
calcText[calcTextPos] = 0;
}
//this function pushes chars onto the token's info cstring
void calcTextAppend(int currentChar)
{
if(calcTextPos >= (CALCTEXT_MAX - 1) )
{
return; //ignore the char
}
calcText[calcTextPos++] = (char)currentChar;
calcText[calcTextPos] = 0;
}
//this function finds the next token and outputs it
int calcLex()
{
char currentChar;
while(1) //infinite loop, breaks through a return statement
{
//clear out the token info from last time
calcTextClear();
//get the next char!
currentChar = filestream.get();
//used to ignore whitespace as a token
while( currentChar == ' ' || currentChar == '\t' || currentChar == '\n')
{
currentChar = filestream.get();
};
//if the current char is the end of a file, it returns the end of file symbol
if(currentChar == EOF)
{
return endOfFileSym;
}
//this is the comment logic: if the next chars are /*...
if(followingChars("/*", currentChar))
{
//then while it isn't ending the line, or finding the end of comment sentinel...
while(currentChar != '\n' && !followingChars("*/", currentChar))
{
//move onto the next character
currentChar = filestream.get();
}
//move to the next character after the comment or line has ended
//hop back up to the top of the while loop
continue;
}
//if the chars ":=" are found, return the assignment symbol
if(followingChars(":=", currentChar))
{
return assignSym;
}
//if the chars "read" are found, return the read symbol
if(followingChars("read", currentChar))
{
return readSym;
}
//if the chars "write" are found, return the write symbol
if(followingChars("write", currentChar))
{
return writeSym;
}
//this line must go below the followingChars checking, because otherwise you get double chars in your token info
calcTextAppend(currentChar);
//checks for identifier strings, must only use letters
if( (currentChar >= 'A' && currentChar <= 'Z') || (currentChar >= 'a' && currentChar <= 'z'))
{
//messy line, but gets new char and checks between capital letters and lowercase letters on the ascii table
while(((currentChar = filestream.get()) >= 'A' && currentChar <= 'Z') || (currentChar >= 'a' && currentChar <= 'z'))
{
calcTextAppend(currentChar);
}
//once the currentChar falls off the end of the identifier string, it must hop back to read the next one
filestream.unget();
//returns the identifier int
return identifier;
}
//if the current char is a number
if((currentChar >= '0' && currentChar <= '9'))
{
while ( ((currentChar = filestream.get()) >= '0' && currentChar <= '9') || currentChar == '.')
{
if (currentChar == '.')
{
calcTextAppend(currentChar);
currentChar = filestream.get();
if (currentChar < '0' || currentChar > '9')
{
return 10;
}
while (currentChar >= '0' && currentChar <= '9')
{
calcTextAppend(currentChar);
currentChar = filestream.get();
}
filestream.unget();
return numConst;
}
calcTextAppend(currentChar);
}
filestream.unget();
return numConst;
}
if(currentChar == '+' || currentChar == '-')
{
return addOp;
}
if(currentChar == '*' || currentChar == '/')
{
return multOp;
}
if(currentChar == '(')
{
return leftParen;
}
if(currentChar == ')')
{
return rightParen;
}
return currentChar; //scanner doesn't know what this char is, throw it away
}
return endOfFileSym; //this should never be returned from here
}
//function that finds if the chars following match a target symbol
bool followingChars(string target, char currentChar)
{
for(int x = 0; x < target.length(); x++)
{
if(currentChar == target[x])
{
calcTextAppend(currentChar);
currentChar = filestream.get();
}
else
{
calcTextClear();
while(x != 0)
{
filestream.unget();
x--;
}
return false;
}
}
filestream.unget();
return true;
}