-
Notifications
You must be signed in to change notification settings - Fork 0
/
Token.hpp
296 lines (273 loc) · 10.7 KB
/
Token.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
/**
* Defines a token.
*
* @author Jason Young
* @version 0.1
*/
#ifndef TOKEN_HPP
#define TOKEN_HPP
//Local includes:
#include "Utils/Optional.hpp"
//System includes:
#include <memory>
#include <string>
#include <vector>
class Node;
struct Token
{
static const size_t NO_PRECEDENCE = 99999;
Node* pNode;
/** Token type. */
enum TokenType
{
TYPE_NONE = 0,
COMMENT_SINGLE_LINE,
COMMENT_MULTI_LINE,
ELEMENT_ACTIONS,
ELEMENT_GRAMMAR,
ELEMENT_INPUT,
ELEMENT_END,
ELEMENT_NAME,
ELEMENT_OUTPUT,
ELEMENT_STATE_TABLE,
LITERAL_CHR,
LITERAL_DEC,
LITERAL_FLT,
LITERAL_HEX,
LITERAL_OCT,
LITERAL_STR,
RANGE_VALUE,
/* Beginning of operators */
START_OF_OPERATORS,
OPERATOR_COMPARISON,
OPERATOR_LESS_GREATER_THAN_COMPARISON,
OPERATOR_AS,
OPERATOR_ASSIGN,
OPERATOR_ASSIGN_INCREMENT,
OPERATOR_ASSIGN_DECREMENT,
OPERATOR_ASSIGN_MULTIPLY,
OPERATOR_ASSIGN_DIVIDE,
OPERATOR_ASSIGN_MODULO,
OPERATOR_MATH,
OPERATOR_DECREMENT,
OPERATOR_INCREMENT,
OPERATOR_PRODUCTION,
OPERATOR_ARRAY_LEFT_BRACKET,
OPERATOR_ARRAY_RIGHT_BRACKET,
OPERATOR_ELEMENT_MEMBER_ACCESS_DOT,
OPERATOR_CONCAT,
OPERATOR_OR,
OPERATOR_LEFT_PAREN,
OPERATOR_RIGHT_PAREN,
OPERATOR_RANGE_LEFT_BRACKET,
OPERATOR_RANGE_RIGHT_BRACKET,
OPERATOR_RANGE_DELIM,
OPERATOR_TYPE_CAST_LEFT_PAREN,
OPERATOR_TYPE_CAST_RIGHT_PAREN,
OPERATOR_STATE_TABLE_DELIM,
OPERATOR_ACTIONS_PARAM_DELIM,
OPERATOR_TERMINATION,
END_OF_OPERATORS,
/* End of operators */
STATIC_VARIABLE,
SYMBOL_ACTIONS,
SYMBOL_GRAMMAR,
SYMBOL_GRAMMAR_SIZE,
SYMBOL_CONSOLE,
SYMBOL_ELEMENT_NAME_ACCESSED, //TRICKY: Should we change this?
SYMBOL_IO_TYPE,
SYMBOL_ACTIONS_PARAM,
SYMBOL_STATE_TABLE,
/* Start of types set by parser */
SYMBOL_STATE_TABLE_STATE,
SYMBOL_STATE_TABLE_INPUT_OR_CONDITION,
SYMBOL_STATE_TABLE_INPUT_METHOD,
SYMBOL_STATE_TABLE_NEXT_STATE,
SYMBOL_STATE_TABLE_OUTPUT_OR_ACTION,
SYMBOL_STATE_TABLE_OUTPUT_METHOD,
/* End of types set by parser */
SYMBOL_TYPED,
UNKNOWN
} type, actualType;
/** Token text. */
std::string text;
/** Line number in orginal TEBNF source code that the token was read from. */
size_t lineNumber;
/** Index of this token in it's container. */
size_t index;
/** Set if this is a symbol accessed using the dot operator. */
std::shared_ptr<Token> pAccessedElementToken;
/** Operator associativity. */
enum OperatorAssociativity { ASSOC_NONE, ASSOC_LEFT, ASSOC_RIGHT };
/** True if this token is last token comprising the ACTIONS element signature,
* which may or may not include parameters. */
bool isActionsLastSignatureToken;
/** True if this is an unsigned type. */
bool isUnsigned;
/** Constructor.
*/
Token(TokenType tokType = TYPE_NONE,
const std::string& tokText = "",
size_t tokPrecedence = NO_PRECEDENCE,
OperatorAssociativity tokAssociativity = ASSOC_NONE)
: pNode(NULL),
type(tokType),
actualType(TYPE_NONE),
text(tokText),
lineNumber(0),
index(0),
isActionsLastSignatureToken(false),
isUnsigned(false)
{}
/** @return true if this token is a single line comment or part of a
* multi-line comment.
*/
bool isComment() const { return COMMENT_MULTI_LINE == type || COMMENT_SINGLE_LINE == type; }
/** Determine if a token is an element.
* @return true if this token is an element, false otherwise.
*/
bool isElement() const
{
return ELEMENT_ACTIONS == type || ELEMENT_GRAMMAR == type ||
ELEMENT_INPUT == type || ELEMENT_OUTPUT == type ||
ELEMENT_STATE_TABLE == type;
}
/** Element check functions.
* @return true if this token matches the type being checked, false otherwise.
*/
bool isElementName() const { return ELEMENT_NAME == type; }
bool isElementGrammar() const { return ELEMENT_GRAMMAR == type; }
bool isElementInput() const { return ELEMENT_INPUT == type; }
bool isElementOutput() const { return ELEMENT_OUTPUT == type; }
bool isElementIo() const { return isElementInput() || isElementOutput(); }
bool isElementStateTable() const { return ELEMENT_STATE_TABLE == type; }
bool isElementActions() const { return ELEMENT_ACTIONS == type; }
bool isElementEnd() const { return ELEMENT_END == type; }
/** Symbol/variable check functions.
* @return true if this token matches the type being checked, false otherwise.
*/
bool isSymbolActions() const { return SYMBOL_ACTIONS == type; }
bool isSymbolActionsParam() const { return SYMBOL_ACTIONS_PARAM == type; }
bool isSymbolTyped() const { return SYMBOL_TYPED == type; }
bool isSymbolTypedUnsigned() const { return isSymbolTyped() && isUnsigned; }
bool isSymbolGrammar() const { return SYMBOL_GRAMMAR == type; }
bool isSymbolGrammarSize() const { return SYMBOL_GRAMMAR_SIZE == type; }
bool isSymbolConsole() const { return SYMBOL_CONSOLE == type; }
bool isSymbolElementNameAccessed() { return SYMBOL_ELEMENT_NAME_ACCESSED == type; }
bool isSymbolIoType() const { return SYMBOL_IO_TYPE == type; }
bool isSymbolStateTable() const { return SYMBOL_STATE_TABLE == type; }
bool isSymbolStateTableState() const { return SYMBOL_STATE_TABLE_STATE == type; }
bool isStaticVariable() const { return STATIC_VARIABLE == type; }
bool isRangeValue() const { return RANGE_VALUE == type; }
/** Determine if a token is an operator.
* @return true if this token is an operator, false otherwise.
*/
bool isOperator() const { return type > START_OF_OPERATORS && type < END_OF_OPERATORS; }
/** Operator check functions.
*/
bool isOperatorMath() const { return OPERATOR_MATH == type; }
bool isOperatorLeftAssociative() const { return OPERATOR_MATH == type; }
bool isOperatorRightAssociative() const { return OPERATOR_ASSIGN == type; }
bool isOperatorLeftArrayBracket() const { return OPERATOR_ARRAY_LEFT_BRACKET == type; }
bool isOperatorRightArrayBracket() const { return OPERATOR_ARRAY_RIGHT_BRACKET == type; }
bool isOperatorLeftRangeBracket() const { return OPERATOR_RANGE_LEFT_BRACKET == type; }
bool isOperatorRightRangeBracket() const { return OPERATOR_RANGE_RIGHT_BRACKET == type; }
bool isOperatorRangeDelim() const { return OPERATOR_RANGE_DELIM == type; }
bool isOperatorTypeCastLeftParen() const { return OPERATOR_TYPE_CAST_LEFT_PAREN == type; }
bool isOperatorTypeCastRightParen() const { return OPERATOR_TYPE_CAST_RIGHT_PAREN == type; }
bool isOperatorLeftParen() const { return OPERATOR_LEFT_PAREN == type; }
bool isOperatorRightParen() const { return OPERATOR_RIGHT_PAREN == type; }
bool isOperatorConcat() const { return OPERATOR_CONCAT == type; }
bool isOperatorAs() const { return OPERATOR_AS == type; }
bool isOperatorOr() const { return OPERATOR_OR == type; }
bool isOperatorComparison() const { return OPERATOR_COMPARISON == type; }
static bool isOperatorOr(TokenType t) { return OPERATOR_OR == t; }
bool isOperatorActionsParamDelim() const { return OPERATOR_ACTIONS_PARAM_DELIM == type; }
bool isOperatorStateTableDelimiter() const { return OPERATOR_STATE_TABLE_DELIM == type; }
bool isOperatorTermination() const { return OPERATOR_TERMINATION == type; }
bool isOperatorMemberAccessDot() const { return OPERATOR_ELEMENT_MEMBER_ACCESS_DOT == type; }
static bool isOperatorAssignment(TokenType t, bool excludeIncDecMulDivMod = false)
{
return excludeIncDecMulDivMod ? OPERATOR_ASSIGN == t :
(OPERATOR_ASSIGN == t ||
OPERATOR_ASSIGN_INCREMENT == t ||
OPERATOR_ASSIGN_DECREMENT == t ||
OPERATOR_ASSIGN_MULTIPLY == t ||
OPERATOR_ASSIGN_DIVIDE == t ||
OPERATOR_ASSIGN_MODULO == t);
}
bool isOperatorAssignment(bool excludeIncAndDec = false) const
{ return Token::isOperatorAssignment(type); }
bool isSeparator() const { return "," == text || "|" == text; }
bool isLiteral() const
{
return LITERAL_CHR == type || LITERAL_DEC == type || LITERAL_FLT == type ||
LITERAL_HEX == type || LITERAL_OCT == type || LITERAL_STR == type;
}
bool isLiteralNumber() const { return LITERAL_STR != type && isLiteral(); }
bool isLiteralString() const { return LITERAL_STR == type && isLiteral(); }
/** Unknown/unassigned type check functions.
*/
bool isTypeUnknown() { return UNKNOWN == type; }
bool isTypeNone() { return TYPE_NONE == type; }
bool isTypeUnknownOrNone() { return UNKNOWN == type || TYPE_NONE == type; }
bool isLeftAssociative() { return ASSOC_LEFT == associativity(); }
bool isRightAssociative() { return ASSOC_RIGHT == associativity(); }
Token::OperatorAssociativity associativity()
{
if(!isOperator())
return Token::ASSOC_NONE;
return
("++" == text || "--" == text || "(" == text || ")" == text ||
"[" == text || "]" == text || "{" == text || "}" == text || "." == text ||
"*" == text || "/" == text || "%" == text ||
"+" == text || "-" == text ||
"<" == text || "<=" == text || ">" == text || ">=" == text ||
"==" == text || "!=" == text ||
",") ? Token::ASSOC_LEFT :
("=" == text || "+=" == text || "-=" == text || "*=" == text || "/=" == text) ? Token::ASSOC_RIGHT :
Token::ASSOC_NONE;
}
size_t precedence()
{
if(!isOperator())
return Token::NO_PRECEDENCE;
return
("++" == text || "--" == text || "(" == text || ")" == text ||
"[" == text || "]" == text || "{" == text || "}" == text || "." == text) ? 2 :
("==" == text || "!=" == text) ? 4 :
("+" == text || "-" == text) ? 5 :
("*" == text || "/" == text || "%" == text) ? 6 :
("<" == text || "<=" == text || ">" == text || ">=" == text) ? 8 :
("=" == text || "+=" == text || "-=" == text) ? 15 :
(",") ? 17 :
Token::NO_PRECEDENCE;
}
static int cmpPrecedence(Token t1, Token t2);
/** Reset values.
*/
void reset()
{
text.clear();
type = TYPE_NONE;
}
/** Operator overloads.
*/
bool operator==(const Token& rRhs)
{ return this->text == rRhs.text && this->type == rRhs.type; }
}; //struct Token
namespace TokenUtils
{
bool getTokenRange(const std::vector<std::shared_ptr<Token> >& srcTokens,
std::vector<std::shared_ptr<Token> >& rRetTokens,
size_t searchStartIndex,
const std::string& startTokenText,
const std::string& endTokenText);
bool getTokenRange(const std::vector<std::shared_ptr<Token> >& srcTokens,
std::vector<std::shared_ptr<Token> >& rRetTokens,
size_t searchStartIndex,
Token::TokenType startTokenType,
Token::TokenType endTokenType,
bool searchBackwards = true);
}
#endif //TOKEN_HPP