Exemplo n.º 1
0
Token&
Tokenizer::_ParseHexOperand()
{
	const char* begin = fCurrentChar;
	fCurrentChar += 2;
		// skip "0x"

	if (!_IsHexDigit(*fCurrentChar))
		throw ParseException("expected hex digit", _CurrentPos());

	fCurrentChar++;
	while (_IsHexDigit(*fCurrentChar))
		fCurrentChar++;

	int32 length = fCurrentChar - begin;
	fCurrentToken = Token(begin, length, _CurrentPos() - length,
		TOKEN_CONSTANT);

	if (length <= 10) {
		// including the leading 0x, a 32-bit constant will be at most
		// 10 characters. Anything larger, and 64 is necessary.
		fCurrentToken.value.SetTo((uint32)strtoul(
			fCurrentToken.string.String(), NULL, 16));
	} else {
		fCurrentToken.value.SetTo((uint64)strtoull(
			fCurrentToken.string.String(), NULL, 16));
	}
	return fCurrentToken;
}
Exemplo n.º 2
0
void
Tokenizer::SetTo(const char* string)
{
	fString = string;
	fCurrentChar = fString.String();
	fCurrentToken = Token();
	fReuseToken = false;
}
Exemplo n.º 3
0
const Token&
Tokenizer::NextToken()
{
	if (fCurrentToken.type == TOKEN_END_OF_LINE)
		return fCurrentToken;

	if (fReuseToken) {
		fReuseToken = false;
		return fCurrentToken;
	}

	while (*fCurrentChar != 0 && isspace(*fCurrentChar))
		fCurrentChar++;

	if (*fCurrentChar == 0) {
		return fCurrentToken = Token("", 0, _CurrentPos(),
			TOKEN_END_OF_LINE);
	}

	bool decimal = *fCurrentChar == '.';

	if (decimal || isdigit(*fCurrentChar)) {
		if (*fCurrentChar == '0' && fCurrentChar[1] == 'x')
			return _ParseHexOperand();

		BString temp;

		const char* begin = fCurrentChar;

		// optional digits before the comma
		while (isdigit(*fCurrentChar)) {
			temp << *fCurrentChar;
			fCurrentChar++;
		}

		// optional post decimal part
		// (required if there are no digits before the decimal)
		if (*fCurrentChar == '.') {
			decimal = true;
			temp << '.';
			fCurrentChar++;

			// optional post decimal digits
			while (isdigit(*fCurrentChar)) {
				temp << *fCurrentChar;
				fCurrentChar++;
			}
		}

		int32 length = fCurrentChar - begin;
		if (length == 1 && decimal) {
			// check for . operator
			fCurrentChar = begin;
			if (!_ParseOperator())
				throw ParseException("unexpected character", _CurrentPos());

			return fCurrentToken;
		}

		BString test = temp;
		test << "&_";
		double value;
		char t[2];
		int32 matches = sscanf(test.String(), "%lf&%s", &value, t);
		if (matches != 2)
			throw ParseException("error in constant", _CurrentPos() - length);

		fCurrentToken = Token(begin, length, _CurrentPos() - length,
			TOKEN_CONSTANT);
		if (decimal)
			fCurrentToken.value.SetTo(value);
		else
			fCurrentToken.value.SetTo((int64)strtoll(temp.String(), NULL, 10));
	} else if (isalpha(*fCurrentChar) || *fCurrentChar == '_') {
		const char* begin = fCurrentChar;
		while (*fCurrentChar != 0 && (isalpha(*fCurrentChar)
			|| isdigit(*fCurrentChar) || *fCurrentChar == '_')) {
			fCurrentChar++;
		}
		int32 length = fCurrentChar - begin;
		fCurrentToken = Token(begin, length, _CurrentPos() - length,
			TOKEN_IDENTIFIER);
	} else if (*fCurrentChar == '"' || *fCurrentChar == '\'') {
		const char* begin = fCurrentChar++;
		while (*fCurrentChar != 0) {
			if (*fCurrentChar == '\\') {
				if (*(fCurrentChar++) != 0)
					fCurrentChar++;
			} else if (*(fCurrentChar++) == *begin)
				break;
		}
		int32 length = fCurrentChar - begin;
		fCurrentToken = Token(begin, length, _CurrentPos() - length,
			TOKEN_STRING_LITERAL);
	} else {
		if (!_ParseOperator()) {
			int32 type = TOKEN_NONE;
			switch (*fCurrentChar) {
				case '\n':
					type = TOKEN_END_OF_LINE;
					break;

				case '(':
					type = TOKEN_OPENING_PAREN;
					break;
				case ')':
					type = TOKEN_CLOSING_PAREN;
					break;

				case '[':
					type = TOKEN_OPENING_SQUARE_BRACKET;
					break;
				case ']':
					type = TOKEN_CLOSING_SQUARE_BRACKET;
					break;

				case '{':
					type = TOKEN_OPENING_CURLY_BRACE;
					break;
				case '}':
					type = TOKEN_CLOSING_CURLY_BRACE;
					break;

				case '\\':
					type = TOKEN_BACKSLASH;
					break;

				case ':':
					type = TOKEN_COLON;
					break;

				case ';':
					type = TOKEN_SEMICOLON;
					break;

				case ',':
					type = TOKEN_COMMA;
					break;

				case '.':
					type = TOKEN_PERIOD;
					break;

				case '#':
					type = TOKEN_POUND;
					break;

				default:
					throw ParseException("unexpected character",
						_CurrentPos());
			}
			fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(),
				type);
			fCurrentChar++;
		}
	}

	return fCurrentToken;
}
Exemplo n.º 4
0
bool
Tokenizer::_ParseOperator()
{
	int32 type = TOKEN_NONE;
	int32 length = 0;
	switch (*fCurrentChar) {
		case '+':
			type = TOKEN_PLUS;
			length = 1;
			break;

		case '-':
			 if (_Peek() == '>') {
			 	type = TOKEN_MEMBER_PTR;
			 	length = 2;
			 } else {
				type = TOKEN_MINUS;
				length = 1;
			 }
			break;

		case '*':
			switch (_Peek()) {
				case '/':
					type = TOKEN_END_COMMENT_BLOCK;
					length = 2;
					break;
				default:
					type = TOKEN_STAR;
					length = 1;
					break;
			}
			break;

		case '/':
			switch (_Peek()) {
				case '*':
					type = TOKEN_BEGIN_COMMENT_BLOCK;
					length = 2;
					break;
				case '/':
					type = TOKEN_INLINE_COMMENT;
					length = 2;
					break;
				default:
					type = TOKEN_SLASH;
					length = 1;
					break;
			}
			break;

		case '%':
			type = TOKEN_MODULO;
			length = 1;
			break;

		case '^':
			type = TOKEN_BITWISE_XOR;
			length = 1;
			break;

		case '&':
			if (_Peek() == '&') {
			 	type = TOKEN_LOGICAL_AND;
			 	length = 2;
			} else {
				type = TOKEN_BITWISE_AND;
				length = 1;
			}
			break;

		case '|':
			if (_Peek() == '|') {
				type = TOKEN_LOGICAL_OR;
				length = 2;
			} else {
				type = TOKEN_BITWISE_OR;
				length = 1;
			}
			break;

		case '!':
			if (_Peek() == '=') {
				type = TOKEN_NE;
				length = 2;
			} else {
				type = TOKEN_LOGICAL_NOT;
				length = 1;
			}
			break;

		case '=':
			if (_Peek() == '=') {
				type = TOKEN_EQ;
				length = 2;
			} else {
				type = TOKEN_ASSIGN;
				length = 1;
			}
			break;

		case '>':
			if (_Peek() == '=') {
				type = TOKEN_GE;
				length = 2;
			} else {
				type = TOKEN_GT;
				length = 1;
			}
			break;

		case '<':
			if (_Peek() == '=') {
				type = TOKEN_LE;
				length = 2;
			} else {
				type = TOKEN_LT;
				length = 1;
			}
			break;

		case '~':
			type = TOKEN_BITWISE_NOT;
			length = 1;
			break;


		case '?':
			type = TOKEN_CONDITION;
			length = 1;
			break;

		case '.':
			type = TOKEN_MEMBER_PTR;
			length = 1;
			break;

		default:
			break;
	}

	if (length == 0)
		return false;

	fCurrentToken = Token(fCurrentChar, length, _CurrentPos(), type);
	fCurrentChar += length;

	return true;
}