void RuleParser::parse() { sweet::jsonparser jp(str); std::string tokStr("Token"); if(jp.getRoot()->pathExists(tokStr)) { auto tok = jp.getRoot()->access(tokStr); auto tokType = tok->getType(); ASSERT_EQ(tokType, sweet::value::type_array); for(auto& it : tok->getArray()) { std::string name = it->getObject()->access("Name")->getString(); std::string regex = it->getObject()->access("Regex")->getString(); std::string convertFunc; if(it->getObject()->pathExists("ConvertFunction")) { convertFunc = it->getObject()->access("ConvertFunction")->getString(); } token.insert(std::make_pair(name, Token(name, regex, convertFunc) )); } } std::string ruleStr("Rules"); if(jp.getRoot()->pathExists(ruleStr)) { auto rls = jp.getRoot()->access(ruleStr); auto rlsType = rls->getType(); ASSERT_EQ(rlsType, sweet::value::type_array); for(auto& it : rls->getArray()) { std::string ruleName = it->getObject()->access("Name")->getString(); auto& entry = it->getObject()->access("Expression")->getArray(); for(auto& jt : entry) { RuleVector rv; auto semiSplit = split(jt->getObject()->access("Rule")->getString(), ';'); std::string ruleEnd = jt->getObject()->get<std::string>("Id", ""); for(auto& ht : semiSplit) { std::string type = trim(ht.substr(0, ht.find('('))); size_t lParen = ht.find('('); size_t rParen = ht.find(')'); std::string saveName; if(lParen != std::string::npos && rParen != std::string::npos) { saveName = trim(ht.substr(lParen+1, rParen), "\t ()"); } rv.push_back(RulePart(type, saveName, ruleEnd)); } ruleMap.insert(std::make_pair(ruleName, Expr(rv))); } } } }
void RE2NFA::tokenize(const QString &input) { symbols.clear(); #if 1 RegExpTokenizer tokenizer(input); Symbol sym; int tok = tokenizer.lex(); while (tok != -1) { Symbol sym; sym.token = static_cast<Token>(tok); sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength); if (sym.token == TOK_QUOTED_STRING) { sym.lexem.chop(1); sym.lexem.remove(0, 1); sym.token = TOK_STRING; } if (sym.token == TOK_STRING || sym.token == TOK_SEQUENCE) { for (int i = 0; i < sym.lexem.length(); ++i) { if (sym.lexem.at(i) == '\\') { if (i >= sym.lexem.length() - 1) break; QChar ch = sym.lexem.at(i + 1); if (ch == QLatin1Char('n')) { ch = '\n'; } else if (ch == QLatin1Char('r')) { ch = '\r'; } else if (ch == QLatin1Char('t')) { ch = '\t'; } else if (ch == QLatin1Char('f')) { ch = '\f'; } sym.lexem.replace(i, 2, ch); } } } /* if (sym.token == TOK_SEQUENCE) { Symbol s; s.token = TOK_LBRACKET; s.lexem = "["; symbols.append(s); for (int i = 1; i < sym.lexem.length() - 1; ++i) { s.token = TOK_STRING; s.lexem = sym.lexem.at(i); symbols.append(s); } s.token = TOK_RBRACKET; s.lexem = "]"; symbols.append(s); tok = tokenizer.lex(); continue; } */ symbols.append(sym); tok = tokenizer.lex(); } #else int pos = 0; bool insideSet = false; while (pos < input.length()) { QChar ch = input.at(pos); Symbol sym; sym.column = pos; sym.token = TOK_INVALID; sym.lexem = QString(ch); switch (ch.toLatin1()) { case '"': { if (insideSet) { sym.token = TOK_STRING; sym.lexem = QString(ch); symbols += sym; ++pos; continue; } if (pos + 1 >= input.length()) return; int quoteEnd = skipQuote(input, pos + 1); sym.token = TOK_STRING; sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2); symbols += sym; pos = quoteEnd; continue; } case '{': sym.token = (insideSet ? TOK_STRING : TOK_LBRACE); break; case '}': sym.token = (insideSet ? TOK_STRING : TOK_RBRACE); break; case '[': insideSet = true; sym.token = TOK_LBRACKET; break; case ']': insideSet = false; sym.token = TOK_RBRACKET; break; case '(': sym.token = (insideSet ? TOK_STRING : TOK_LPAREN); break; case ')': sym.token = (insideSet ? TOK_STRING : TOK_RPAREN); break; case ',': sym.token = (insideSet ? TOK_STRING : TOK_COMMA); break; case '*': sym.token = (insideSet ? TOK_STRING : TOK_STAR); break; case '|': sym.token = (insideSet ? TOK_STRING : TOK_OR); break; case '?': sym.token = (insideSet ? TOK_STRING : TOK_QUESTION); break; case '.': sym.token = (insideSet ? TOK_STRING : TOK_DOT); break; case '+': sym.token = (insideSet ? TOK_STRING : TOK_PLUS); break; case '\\': ++pos; if (pos >= input.length()) return; ch = input.at(pos); if (ch == QLatin1Char('n')) { ch = '\n'; } else if (ch == QLatin1Char('r')) { ch = '\r'; } else if (ch == QLatin1Char('t')) { ch = '\t'; } else if (ch == QLatin1Char('f')) { ch = '\f'; } // fall through default: sym.token = TOK_STRING; sym.lexem = QString(ch); symbols += sym; ++pos; continue; } symbols += sym; ++pos; } #endif #if 0 foreach (Symbol s, symbols) { qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem; }
bool ParseMeal(std::ifstream & infile, Meal & meal) { if (true == infile.eof()) { return false; } const int MAX_TOKEN = 256; std::string line; std::string tokenBuf(MAX_TOKEN, '\0'); const char * delim = ",\n"; do { // read line std::getline(infile, line); // grab first token char * token = nullptr; char * nextToken = nullptr; token = strtok_s(&line[0], delim, &nextToken); bool mealTypeParsed = false; bool caloriesParsed = false; // locals for meal header row MealType mealType = MealType::MEAL_BREAKFAST; float calories; // walk the line while (token != nullptr && nextToken != nullptr) { // valid meal type? std::string tokTrimmed = token; trim_str(tokTrimmed); String tokStr(ConvertStringToWString(tokTrimmed)); if (true == StringToMealType(tokStr, mealType)) { mealTypeParsed = true; } else if (true == mealTypeParsed) { calories = (float)atof(token); // if calories column is empty or whitespace, or an invalid number, skip this meal if (calories > 0) { caloriesParsed = true; break; } } // next token plz token = strtok_s(nullptr, delim, &nextToken); } // if we didn't parse a meal line, fail if (false == mealTypeParsed || false == caloriesParsed) { // try the next line continue; } assert(true == IsValidMealType(mealType) && calories > 0); // parse as many food items are below the meal line Meal workingMeal(mealType, calories); FoodItem foodItem; while (false == infile.eof() && true == ParseFoodItem(infile, foodItem)) { workingMeal.AddFoodItem(foodItem); // make sure this line isn't starting a new meal std::string peekFirstToken; if (false == PeekString(infile, delim, peekFirstToken)) { // couldn't read a token, done with this meal return true; } trim_str(peekFirstToken); String strFirstToken(ConvertStringToWString(peekFirstToken)); MealType nextTokMealType; if (true == StringToMealType(strFirstToken, nextTokMealType)) { // next token is a meal type, so we're done with this meal break; } } // meal is valid meal = workingMeal; break; } while (false == infile.eof()); return true; }