Example #1
0
const vector<StringSymbol> Parser::tokenize(const string& _str)
{
	vector<StringSymbol> tokens;
	string substr;
	int32_t whitespace_position;
	int32_t word_end;
	Symbol symbol;

	for (uint32_t i = 0; i < _str.size();) {
		whitespace_position = _str.find_first_not_of(" ", i);

		if (_str[whitespace_position] == ';') {
			substr = _str.substr(i, _str.size() - i);
			tokens.push_back(StringSymbol(substr, Symbol::TS_COMMENT));
			break;
		}

		word_end = _str.find_first_of(" ", whitespace_position);
		symbol = lexer(_str.substr(whitespace_position, word_end - whitespace_position));

		if (symbol == Symbol::TS_LABEL) {  // Hack para Symbol::TS_MARKER con : separados
			uint32_t tp_start;
			uint32_t tp_end;
			string label;

			label = _str.substr(whitespace_position, word_end - whitespace_position);
			trim_left(label);
			trim_right(label);

			tp_start = _str.find_first_not_of(" ", word_end);
			tp_end = _str.find_first_of(" ", tp_start);

			if (tp_start >= 0 && tp_start < _str.size() && tp_end >= 0 && tp_end < _str.size()) {
				substr = _str.substr(tp_start, tp_end - tp_start);

				if (lexer(substr) == Symbol::TS_TP) {
					tokens.push_back(StringSymbol(label + ":", Symbol::TS_MARKER));
					i = tp_end;
					continue;
				}
			}
		}

		if (symbol == Symbol::NONE) {
			cout << "ERROR: " << _str.substr(whitespace_position, word_end - whitespace_position) <<
			     " no corresponde a ningún símbolo válido" << endl;
		}

		substr = _str.substr(whitespace_position, word_end - whitespace_position);
		trim_right(substr);
		trim_left(substr);
		tokens.push_back(StringSymbol(substr, symbol));
		i = word_end;
	}

	tokens.push_back(StringSymbol("", Symbol::TS_EOL));

	return tokens;
}