std::string Parse::ParseToDelimiter(const std::string& str, char delimiter0) { std::string ret = str; for (int i = 0; i < str.size(); i++) { if (str[i] == '\'' || str[i] == '\"') { int consumeLiteral = ParseStringLiteral(str.substr(i)).size() - 1; if (consumeLiteral > 0) i += consumeLiteral; } //if we come to a '\', ignore it and the next character else if (str[i] == '\\') { i+=1; } else if (str[i] == delimiter0) { ret = str.substr(0, i); break; } } return ret; }
bool CScanner::NextToken() { SkipWhiteSpace(); if (ErrorString != "") return false; // if (Pos >= SourceLength) // return false; _off_t OldPos = Pos; char c = cget(); if (isdigit(c) || // Thinks a little, while reading this condition, it might be a bit unreadable. ( ( c == '.' && ( (IncPos(), isdigit(cget())) || (DecPos(), false) ) ) && (DecPos(), true) ) ) return ParseNumber() && OldPos != Pos; else if (isalpha(c) || c == '_') return ParseIdentifier() && OldPos != Pos; else if (MatchPrefix("'")) return ParseCharacterLiteral() && OldPos != Pos; else if (MatchPrefix("\"")) return ParseStringLiteral() && OldPos != Pos; else if (SymbolicTokenMapping.find(string() += c)) return ParseSymbol() && OldPos != Pos; else if (c == 0) return true; else ErrorString = "Unrecognized character at line: ";// + itoa(CurrentLine) + ", Column: " + itoa(CurrentColumn); return false; }
bool Read() { bool result = false; EatSpace(); if (index < length) { result = true; char c = Current(); if (IsAlpha(c) || c == '_') ParseIdentifier(); else if (IsDigit(c)) ParseIntegerLiteral(); else if (c == '"') ParseStringLiteral(); else if (c == '\'') ParseCodePointLiteral(); else if (IsSymbol(c)) ParseSymbols(); else result = false; } return result; }
// *cur_ == '{' JsonValue * JsonParser::ReadObject(JsonValue * parent) { std::auto_ptr<JsonObjectValue> object(new JsonObjectValue(parent)); std::string key; Advance(); SkipSpaces(); if (*cur_ == '}') { Advance(); return object.release(); } while (true) { if (*cur_ == '"') { ParseStringLiteral(key); } else { RaiseError("Unexpected symbol, while waiting key of object"); } SkipSpaces(); if (*cur_ != ':') { RaiseError("Colon expected"); } Advance(); SkipSpaces(); object->Set(key, ReadValue(parent)); SkipSpaces(); if (*cur_ == ',') { Advance(); SkipSpaces(); } else if (*cur_ == '}') { Advance(); return object.release(); } else { RaiseError("Comma or end of object expected"); } } }
std::string Parse::ParseToMatchingParen(const std::string& str, char startParen, char stopParen) { std::string ret = ""; //to match parens int parens = 0; size_t len = 0; for (size_t i = 0; i < str.size(); i++) { len = i; if (str[i] == startParen) parens++; else if(str[i] == stopParen) parens--; if (parens == 0) break; //if we come to a string, parse it to ignore it if (str[i] == '\"' || str[i] == '\'') { int consumeLiteral = ParseStringLiteral(str.substr(i)).size() - 1; if (consumeLiteral > 0) i += consumeLiteral; //again, if i is more than mExpression's size, something is wrong with the string literal if (i >= str.size()) { len = i; break; } } //if we come to a '\', ignore it and the next character else if (str[i] == '\\') { i+=1; } } ret = str.substr(1, len - 1); return ret; }
bool Parser::GetToken(token& out) { // trim leading whitespace first: for (; text.str < text.end; ++text.str) { char ch = *text.str; if (ch == ' ' || ch == '\t') continue; if (ch == '\r' || ch == '\n') SkipNewLine(ch); else break; // not ' ' or '\t', so finish trim } out.end = out.str = text.str; // mark down start and default end column = GetColumn(); for ( ; text.str < text.end; ++text.str) { char ch = *text.str; if (ch == '\r' || ch == '\n') { SkipNewLine(ch); continue; } if (ch == '/') { // possible comment start? char c2 = text.str[1]; if (c2 == '/' || c2 == '*') // line comment '//' or block comment '/*' return ParseComment(out, c2 == '*'); // parse the comment } else if (ch == '"') return ParseStringLiteral(out); // skip the literal else if (ch == '\'') return ParseCharLiteral(out); if (BreakerIndex[ch] == 1) // token starts with a breaker char { line = cur_line; column = GetColumn(); char c2 = *++text.str; char c3 = text.str[1]; if (c3 == '=' && ( (ch == '<' && c2 == '<')|| // <<= (ch == '>' && c2 == '>'))) // >>= { text.str += 2; // 3 char token parsed } else if ((c2 == '=' && ( ch == '<' || // <= ch == '>' || // >= ch == '=' || // == ch == '!' || // != ch == '+' || // += ch == '-' || // -= ch == '*' || // *= ch == '/' || // /= ch == '%' || // %= ch == '&' || // &= ch == '|' || // |= ch == '^' // ^= )) || (ch == '+' && c2 == '+') || // ++ (ch == '-' && c2 == '-') || // -- (ch == '-' && c2 == '>') || // -> (ch == '?' && c2 == '?') || // ?? (ch == '&' && c2 == '&') || // && (ch == '|' && c2 == '|') || // || (ch == '<' && c2 == '<') || // << (ch == '>' && c2 == '>') || // >> (ch == ':' && c2 == ':')) // :: { ++text.str; // 2 char token parsed } out.end = text.str; return true; // 1 char token parsed } // so this is a literal or a statement? return ParseStatement(out); } return false; // end of buffer }
char* Preprocessor::ParseLexem( char* start, char* end, Lexem& out ) { if( start == end ) return start; char current_char = *start; if( IsTrivial( current_char ) ) { out.Value += current_char; out.Type = TrivialTypes[Trivials.find_first_of( current_char )]; return ++start; } if( IsIdentifierStart( current_char ) ) return ParseIdentifier( start, end, out ); if( current_char == '#' ) { out.Value = "#"; ++start; if( *start == '#' ) { out.Value = "##"; out.Type = Lexem::IGNORE; return ( ++start ); } while( start != end && ( *start == ' ' || *start == '\t' ) ) ++start; if( start != end && IsIdentifierStart( *start ) ) start = ParseIdentifier( start, end, out ); out.Type = Lexem::PREPROCESSOR; return start; } if( IsNumber( current_char ) ) return ParseNumber( start, end, out ); if( current_char == '\"' ) return ParseStringLiteral( start, end, '\"', out ); if( current_char == '\'' ) return ParseStringLiteral( start, end, '\'', out ); // Todo: set optional ParseCharacterLiteral? if( current_char == '/' ) { // Need to see if it's a comment. ++start; if( start == end ) return start; if( *start == '*' ) return ParseBlockComment( start, end, out ); if( *start == '/' ) return ParseLineComment( start, end, out ); // Not a comment - let default code catch it as MISC --start; } if( current_char == '\\' ) { out.Type = Lexem::BACKSLASH; return ++start; } out.Value = std::string( 1, current_char ); out.Type = Lexem::IGNORE; return ++start; }
// *cur == '"' JsonValue * JsonParser::ReadString(JsonValue * parent) { std::string value; ParseStringLiteral(value); return new JsonStringValue(parent, value); }