void Preprocessor::parseEscapes(std::string& s) { parseEscape(s, "\\n", "\n"); parseEscape(s, "\\s", " "); parseEscape(s, "\\t", "\t"); parseEscape(s, "\\v", "\v"); }
std::string tokenFromStream( T& in ) { fc::stringstream token; try { char c = in.peek(); while( true ) { switch( c = in.peek() ) { case '\\': token << parseEscape( in ); break; case '\t': case ' ': case ',': case ':': case '\0': case '\n': case '\x04': in.get(); return token.str(); case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '_': case '-': case '.': case '+': case '/': token << c; in.get(); break; default: return token.str(); } } return token.str(); } catch( const fc::eof_exception& eof ) { return token.str(); } catch (const std::ios_base::failure&) { return token.str(); } FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'", ("token", token.str() ) ); }
std::string stringFromToken( T& in ) { fc::stringstream token; try { char c = in.peek(); while( true ) { switch( c = in.peek() ) { case '\\': token << parseEscape( in ); break; case '\t': case ' ': case '\0': case '\n': in.get(); return token.str(); default: if( isalnum( c ) || c == '_' || c == '-' || c == '.' || c == ':' || c == '/' ) { token << c; in.get(); } else return token.str(); } } return token.str(); } catch( const fc::eof_exception& eof ) { return token.str(); } catch (const std::ios_base::failure&) { return token.str(); } FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'", ("token", token.str() ) ); }
std::string stringFromStream( T& in ) { fc::stringstream token; try { char c = in.peek(); if( c != '"' ) FC_THROW_EXCEPTION( parse_error_exception, "Expected '\"' but read '${char}'", ("char", string(&c, (&c) + 1) ) ); in.get(); while( true ) { switch( c = in.peek() ) { case '\\': token << parseEscape( in ); break; case 0x04: FC_THROW_EXCEPTION( parse_error_exception, "EOF before closing '\"' in string '${token}'", ("token", token.str() ) ); case '"': in.get(); return token.str(); default: token << c; in.get(); } } FC_THROW_EXCEPTION( parse_error_exception, "EOF before closing '\"' in string '${token}'", ("token", token.str() ) ); } FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'", ("token", token.str() ) ); }
std::string quoteStringFromStream( T& in ) { fc::stringstream token; try { char q = in.get(); switch( q ) { case '\'': if( strict ) FC_THROW_EXCEPTION( parse_error_exception, "expected: '\"' at beginning of string, got '\''" ); // falls through case '"': break; default: if( strict ) FC_THROW_EXCEPTION( parse_error_exception, "expected: '\"' at beginning of string" ); else FC_THROW_EXCEPTION( parse_error_exception, "expected: '\"' | '\\\'' at beginning of string" ); } if( in.peek() == q ) { in.get(); try { if( in.peek() != q ) return std::string(); } catch( const fc::eof_exception& e ) { return std::string(); } // triple quote processing if( strict ) FC_THROW_EXCEPTION( parse_error_exception, "triple quote unsupported in strict mode" ); else { in.get(); while( true ) { char c = in.peek(); if( c == q ) { in.get(); char c2 = in.peek(); if( c2 == q ) { in.get(); char c3 = in.peek(); if( c3 == q ) { in.get(); return token.str(); } token << q << q; continue; } token << q; continue; } else if( c == '\x04' ) FC_THROW_EXCEPTION( parse_error_exception, "unexpected EOF in string '${token}'", ("token", token.str() ) ); else if( allow_escape && (c == '\\') ) token << parseEscape( in ); else { in.get(); token << c; } } } } while( true ) { char c = in.peek(); if( c == q ) { in.get(); return token.str(); } else if( c == '\x04' ) FC_THROW_EXCEPTION( parse_error_exception, "unexpected EOF in string '${token}'", ("token", token.str() ) ); else if( allow_escape && (c == '\\') ) token << parseEscape( in ); else if( (c == '\r') | (c == '\n') ) FC_THROW_EXCEPTION( parse_error_exception, "unexpected EOL in string '${token}'", ("token", token.str() ) ); else { in.get(); token << c; } } } FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'", ("token", token.str() ) ); }
NFAUNode * WCPattern::parse(const bool inParen, const bool inOr, NFAUNode ** end) { NFAUNode * start, *cur, *next = NULL; CMString t; int grc = groupCount++; bool inv, quo; bool ahead = 0, pos = 0, noncap = 0, indep = 0; unsigned long oldFlags = flags; if (inParen) { if (pattern[curInd] == '?') { ++curInd; --groupCount; if (pattern[curInd] == ':') { noncap = 1; ++curInd; grc = --nonCapGroupCount; } else if (pattern[curInd] == '=') { ++curInd; ahead = 1; pos = 1; } else if (pattern[curInd] == '!') { ++curInd; ahead = 1; pos = 0; } else if (pattern.Mid(curInd, 2) == L"<=") { curInd += 2; return parseBehind(1, end); } else if (pattern.Mid(curInd, 2) == L"<!") { curInd += 2; return parseBehind(0, end); } else if (pattern[curInd] == '>') { ++curInd; indep = 1; } else { bool negate = false, done = false; while (!done) { if (curInd >= pattern.GetLength()) { raiseError(); return NULL; } else if (negate) { switch (pattern[curInd]) { case 'i': flags &= ~WCPattern::CASE_INSENSITIVE; break; case 'd': flags &= ~WCPattern::UNIX_LINE_MODE; break; case 'm': flags &= ~WCPattern::MULTILINE_MATCHING; break; case 's': flags &= ~WCPattern::DOT_MATCHES_ALL; break; case ':': done = true; break; case ')': ++curInd; *end = registerNode(new NFALookBehindUNode(L"", true)); return *end; case '-': default: raiseError(); return NULL; } } else { switch (pattern[curInd]) { case 'i': flags |= WCPattern::CASE_INSENSITIVE; break; case 'd': flags |= WCPattern::UNIX_LINE_MODE; break; case 'm': flags |= WCPattern::MULTILINE_MATCHING; break; case 's': flags |= WCPattern::DOT_MATCHES_ALL; break; case ':': done = true; break; case '-': negate = true; break; case ')': ++curInd; *end = registerNode(new NFALookBehindUNode(L"", true)); return *end; default: raiseError(); return NULL; } } ++curInd; } noncap = 1; grc = --nonCapGroupCount; } if (noncap) cur = start = registerNode(new NFAGroupHeadUNode(grc)); else cur = start = registerNode(new NFASubStartUNode); } else cur = start = registerNode(new NFAGroupHeadUNode(grc)); } else cur = start = registerNode(new NFASubStartUNode); while (curInd < pattern.GetLength()) { wchar_t ch = pattern[curInd++]; next = NULL; if (error) return NULL; switch (ch) { case '^': if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineUNode); else next = registerNode(new NFAStartOfInputUNode); break; case '$': if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineUNode); else next = registerNode(new NFAEndOfInputUNode(0)); break; case '|': --groupCount; cur->next = registerNode(new NFAAcceptUNode); cur = start = registerNode(new NFAOrUNode(start, parse(inParen, 1))); break; case '\\': if (curInd < pattern.GetLength()) { bool eoi = 0; switch (pattern[curInd]) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': next = parseBackref(); break; case 'A': ++curInd; next = registerNode(new NFAStartOfInputUNode); break; case 'B': ++curInd; next = registerNode(new NFAWordBoundaryUNode(0)); break; case 'b': ++curInd; next = registerNode(new NFAWordBoundaryUNode(1)); break; case 'G': ++curInd; next = registerNode(new NFAEndOfMatchUNode); break; case 'Z': eoi = 1; case 'z': ++curInd; next = registerNode(new NFAEndOfInputUNode(eoi)); break; default: t = parseEscape(inv, quo); //printf("inv quo classes { %c %c %s }\n", inv ? 't' : 'f', quo ? 't' : 'f', t.c_str()); if (!quo) { if (t.GetLength() > 1 || inv) { if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassUNode(t, inv)); else next = registerNode(new NFAClassUNode(t, inv)); } else next = registerNode(new NFACharUNode(t[0])); } else next = parseQuote(); } } else raiseError(); break; case '[': if ((flags & WCPattern::CASE_INSENSITIVE) == 0) { NFAClassUNode * clazz = new NFAClassUNode(); CMString s = parseClass(); for (int i = 0; i < (int)s.GetLength(); ++i) clazz->vals[s[i]] = 1; next = registerNode(clazz); } else { NFACIClassUNode * clazz = new NFACIClassUNode(); CMString s = parseClass(); for (int i = 0; i < s.GetLength(); ++i) clazz->vals[to_lower(s[i])] = 1; next = registerNode(clazz); } break; case '.': { bool useN = 1, useR = 1; NFAClassUNode * clazz = new NFAClassUNode(1); if ((flags & WCPattern::UNIX_LINE_MODE) != 0) useR = 0; if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0; if (useN) clazz->vals['\n'] = 1; if (useR) clazz->vals['\r'] = 1; next = registerNode(clazz); } break; case '(': { NFAUNode *end, *t1, *t2; t1 = parse(1, 0, &end); if (!t1) raiseError(); else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL) { cur->next = t2; cur = t2->next; } else { cur->next = t1; cur = end; } } break; case ')': if (!inParen) raiseError(); else if (inOr) { --curInd; cur = cur->next = registerNode(new NFAAcceptUNode); flags = oldFlags; return start; } else { if (ahead) { cur = cur->next = registerNode(new NFAAcceptUNode); flags = oldFlags; return *end = registerNode(new NFALookAheadUNode(start, pos)); } else if (indep) { cur = cur->next = registerNode(new NFAAcceptUNode); flags = oldFlags; return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1)); } else { // capping or noncapping, it doesnt matter *end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc)); next = quantifyGroup(start, *end, grc); if (next) { start = next; *end = next->next; } flags = oldFlags; return start; } } break; case '{': // registered pattern cur->next = parseRegisteredWCPattern(&next); if (cur->next) cur = next; break; case '*': case '+': case '?': // case '}': // case ']': raiseError(); break; default: if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch)); else next = registerNode(new NFACharUNode(ch)); break; } if (next) cur = cur->next = quantify(next); } if (inParen) raiseError(); else { if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode); if (end) *end = cur; } flags = oldFlags; if (error) return NULL; return start; }
CMString WCPattern::parseClass() { CMString t, ret; wchar_t ch, c1, c2; bool inv = 0, neg = 0, quo = 0; if (curInd < pattern.GetLength() && pattern[curInd] == '^') { ++curInd; neg = 1; } while (curInd < pattern.GetLength() && pattern[curInd] != ']') { ch = pattern[curInd++]; if (ch == '[') { t = parseClass(); ret = classUnion(ret, t); } else if (ch == '&' && curInd < pattern.GetLength() && pattern[curInd] == '&') { if (pattern[++curInd] != '[') { raiseError(); curInd = pattern.GetLength(); } else { ++curInd; t = parseClass(); ret = classIntersect(ret, t); } } else if (ch == '\\') { t = parseEscape(inv, quo); if (quo) { raiseError(); curInd = pattern.GetLength(); } else if (inv || t.GetLength() > 1) { // cant be part of a range (a-z) if (inv) t = classNegate(t); ret = classUnion(ret, t); } else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { // part of a range (a-z) c1 = t[0]; ++curInd; if (curInd >= pattern.GetLength()) raiseError(); else { c2 = pattern[curInd++]; if (c2 == '\\') { t = parseEscape(inv, quo); if (quo) { raiseError(); curInd = pattern.GetLength(); } else if (inv || t.GetLength() > 1) raiseError(); else ret = classUnion(ret, classCreateRange(c1, c2)); } else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') { raiseError(); curInd = pattern.GetLength(); } else ret = classUnion(ret, classCreateRange(c1, c2)); } } else ret = classUnion(ret, t); } else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { c1 = ch; ++curInd; if (curInd >= pattern.GetLength()) raiseError(); else { c2 = pattern[curInd++]; if (c2 == '\\') { t = parseEscape(inv, quo); if (quo) { raiseError(); curInd = pattern.GetLength(); } else if (inv || t.GetLength() > 1) raiseError(); else ret = classUnion(ret, classCreateRange(c1, c2)); } else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') { raiseError(); curInd = pattern.GetLength(); } else ret = classUnion(ret, classCreateRange(c1, c2)); } } else ret.AppendChar(ch); } if (curInd >= pattern.GetLength() || pattern[curInd] != ']') { raiseError(); ret = L""; } else { ++curInd; if (neg) ret = classNegate(ret); } return ret; }