Example #1
0
void RuleParser::parse() {
	sweet::jsonparser jp(str);

	std::string tokStr("Token");
	if(jp.getRoot()->pathExists(tokStr)) {
		auto tok = jp.getRoot()->access(tokStr);
		auto tokType = tok->getType();
		ASSERT_EQ(tokType, sweet::value::type_array);

		for(auto& it : tok->getArray()) {
			std::string name = it->getObject()->access("Name")->getString();
			std::string regex = it->getObject()->access("Regex")->getString();
			std::string convertFunc;
			if(it->getObject()->pathExists("ConvertFunction")) {
				convertFunc = it->getObject()->access("ConvertFunction")->getString();
			}
			token.insert(std::make_pair(name,
				Token(name, regex, convertFunc)
			));
		}
	}

	std::string ruleStr("Rules");
	if(jp.getRoot()->pathExists(ruleStr)) {
		auto rls = jp.getRoot()->access(ruleStr);
		auto rlsType = rls->getType();
		ASSERT_EQ(rlsType, sweet::value::type_array);
		for(auto& it : rls->getArray()) {

			std::string ruleName = it->getObject()->access("Name")->getString();
			auto& entry = it->getObject()->access("Expression")->getArray();
			for(auto& jt : entry) {
				RuleVector rv;
				auto semiSplit = split(jt->getObject()->access("Rule")->getString(), ';');
				std::string ruleEnd = jt->getObject()->get<std::string>("Id", "");
				for(auto& ht : semiSplit) {
					std::string type = trim(ht.substr(0, ht.find('(')));
					size_t lParen = ht.find('(');
					size_t rParen = ht.find(')');
					std::string saveName;
					if(lParen != std::string::npos && rParen != std::string::npos) {
						saveName = trim(ht.substr(lParen+1, rParen), "\t ()");
					}

					rv.push_back(RulePart(type, saveName, ruleEnd));
				}

				ruleMap.insert(std::make_pair(ruleName, Expr(rv)));
			}

		}
	}
}
Example #2
0
void RE2NFA::tokenize(const QString &input)
{
    symbols.clear();
#if 1
    RegExpTokenizer tokenizer(input);
    Symbol sym;
    int tok = tokenizer.lex();
    while (tok != -1) {
        Symbol sym;
        sym.token = static_cast<Token>(tok);
        sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);

        if (sym.token == TOK_QUOTED_STRING) {
            sym.lexem.chop(1);
            sym.lexem.remove(0, 1);
            sym.token = TOK_STRING;
        }

        if (sym.token == TOK_STRING || sym.token == TOK_SEQUENCE) {
            for (int i = 0; i < sym.lexem.length(); ++i) {
                if (sym.lexem.at(i) == '\\') {
                    if (i >= sym.lexem.length() - 1)
                        break;
                    QChar ch = sym.lexem.at(i + 1);
                    if (ch == QLatin1Char('n')) {
                        ch = '\n';
                    } else if (ch == QLatin1Char('r')) {
                        ch = '\r';
                    } else if (ch == QLatin1Char('t')) {
                        ch = '\t';
                    } else if (ch == QLatin1Char('f')) {
                        ch = '\f';
                    }
                    sym.lexem.replace(i, 2, ch);
                }
            }
        }

        /*
        if (sym.token == TOK_SEQUENCE) {
            Symbol s;
            s.token = TOK_LBRACKET;
            s.lexem = "[";
            symbols.append(s);

            for (int i = 1; i < sym.lexem.length() - 1; ++i) {
                s.token = TOK_STRING;
                s.lexem = sym.lexem.at(i);
                symbols.append(s);
            }

            s.token = TOK_RBRACKET;
            s.lexem = "]";
            symbols.append(s);

            tok = tokenizer.lex();
            continue;
        }
        */

        symbols.append(sym);
        tok = tokenizer.lex();
    }
#else
    int pos = 0;
    bool insideSet = false;
    while (pos < input.length()) {
        QChar ch = input.at(pos);

        Symbol sym;
        sym.column = pos;
        sym.token = TOK_INVALID;
        sym.lexem = QString(ch);
        switch (ch.toLatin1()) {
            case '"': {
                if (insideSet) {
                    sym.token = TOK_STRING;
                    sym.lexem = QString(ch);
                    symbols += sym;
                    ++pos;
                    continue;
                }
                if (pos + 1 >= input.length())
                    return;
                int quoteEnd = skipQuote(input, pos + 1);
                sym.token = TOK_STRING;
                sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);
                symbols += sym;
                pos = quoteEnd;
                continue;
            }
            case '{':
                sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);
                break;
            case '}':
                sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);
                break;
            case '[':
                insideSet = true;
                sym.token = TOK_LBRACKET;
                break;
            case ']':
                insideSet = false;
                sym.token = TOK_RBRACKET;
                break;
            case '(':
                sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);
                break;
            case ')':
                sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);
                break;
            case ',':
                sym.token = (insideSet ? TOK_STRING : TOK_COMMA);
                break;
            case '*':
                sym.token = (insideSet ? TOK_STRING : TOK_STAR);
                break;
            case '|':
                sym.token = (insideSet ? TOK_STRING : TOK_OR);
                break;
            case '?':
                sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);
                break;
            case '.':
                sym.token = (insideSet ? TOK_STRING : TOK_DOT);
                break;
            case '+':
                sym.token = (insideSet ? TOK_STRING : TOK_PLUS);
                break;
            case '\\':
                ++pos;
                if (pos >= input.length())
                    return;
                ch = input.at(pos);
                if (ch == QLatin1Char('n')) {
                    ch = '\n';
                } else if (ch == QLatin1Char('r')) {
                    ch = '\r';
                } else if (ch == QLatin1Char('t')) {
                    ch = '\t';
                } else if (ch == QLatin1Char('f')) {
                    ch = '\f';
                }
                // fall through
            default:
                sym.token = TOK_STRING;
                sym.lexem = QString(ch);
                symbols += sym;
                ++pos;
                continue;
        }
        symbols += sym;
        ++pos;
    }
#endif
#if 0
    foreach (Symbol s, symbols) {
        qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;
    }
Example #3
0
bool ParseMeal(std::ifstream & infile, Meal & meal)
{
	if (true == infile.eof())
	{
		return false;
	}

	const int MAX_TOKEN = 256;
	std::string line;
	std::string tokenBuf(MAX_TOKEN, '\0');

	const char * delim = ",\n";
	
	do
	{
		// read line
		std::getline(infile, line);

		// grab first token
		char * token = nullptr;
		char * nextToken = nullptr;
		token = strtok_s(&line[0], delim, &nextToken);

		bool mealTypeParsed = false;
		bool caloriesParsed = false;

		// locals for meal header row
		MealType mealType = MealType::MEAL_BREAKFAST;
		float calories;

		// walk the line
		while (token != nullptr && nextToken != nullptr)
		{
			// valid meal type?
			std::string tokTrimmed = token;
			trim_str(tokTrimmed);

			String tokStr(ConvertStringToWString(tokTrimmed));

			if (true == StringToMealType(tokStr, mealType))
			{
				mealTypeParsed = true;
			}
			else if (true == mealTypeParsed)
			{
				calories = (float)atof(token);

				// if calories column is empty or whitespace, or an invalid number, skip this meal
				if (calories > 0)
				{
					caloriesParsed = true;
					break;
				}
			}

			// next token plz
			token = strtok_s(nullptr, delim, &nextToken);
		}

		// if we didn't parse a meal line, fail
		if (false == mealTypeParsed || false == caloriesParsed)
		{
			// try the next line
			continue;
		}

		assert(true == IsValidMealType(mealType) && calories > 0);

		// parse as many food items are below the meal line
		Meal workingMeal(mealType, calories);

		FoodItem foodItem;
		while (false == infile.eof() && true == ParseFoodItem(infile, foodItem))
		{
			workingMeal.AddFoodItem(foodItem);

			// make sure this line isn't starting a new meal
			std::string peekFirstToken;
			if (false == PeekString(infile, delim, peekFirstToken))
			{
				// couldn't read a token, done with this meal
				return true;
			}

			trim_str(peekFirstToken);
			String strFirstToken(ConvertStringToWString(peekFirstToken));
			MealType nextTokMealType;
			if (true == StringToMealType(strFirstToken, nextTokMealType))
			{
				// next token is a meal type, so we're done with this meal
				break;
			}
		}

		// meal is valid
		meal = workingMeal;
		
		break;
	} 
	while (false == infile.eof());

	return true;
}