Esempio n. 1
0
void Preprocessor::parseEscapes(std::string& s)
{
    parseEscape(s, "\\n", "\n");
    parseEscape(s, "\\s", " ");
    parseEscape(s, "\\t", "\t");
    parseEscape(s, "\\v", "\v");
}
Esempio n. 2
0
   std::string tokenFromStream( T& in )
   {
      fc::stringstream token;
      try
      {
         char c = in.peek();

         while( true )
         {
            switch( c = in.peek() )
            {
               case '\\':
                  token << parseEscape( in );
                  break;
               case '\t':
               case ' ':
               case ',':
               case ':':
               case '\0':
               case '\n':
               case '\x04':
                  in.get();
                  return token.str();
               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
               case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p':
               case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x':
               case 'y': case 'z':
               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H':
               case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P':
               case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
               case 'Y': case 'Z':
               case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
               case '8': case '9':
               case '_': case '-': case '.': case '+': case '/':
                  token << c;
                  in.get();
                  break;
               default:
                  return token.str();
            }
         }
         return token.str();
      }
      catch( const fc::eof_exception& eof )
      {
         return token.str();
      }
      catch (const std::ios_base::failure&)
      {
         return token.str();
      }

      FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'",
                                          ("token", token.str() ) );
   }
Esempio n. 3
0
   std::string stringFromToken( T& in )
   {
      fc::stringstream token;
      try
      {
         char c = in.peek();

         while( true )
         {
            switch( c = in.peek() )
            {
               case '\\':
                  token << parseEscape( in );
                  break;
               case '\t':
               case ' ':
               case '\0':
               case '\n':
                  in.get();
                  return token.str();
               default:
                if( isalnum( c ) || c == '_' || c == '-' || c == '.' || c == ':' || c == '/' )
                {
                  token << c;
                  in.get();
                }
                else return token.str();
            }
         }
         return token.str();
      }
      catch( const fc::eof_exception& eof )
      {
         return token.str();
      }
      catch (const std::ios_base::failure&)
      {
         return token.str();
      }

      FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'",
                                          ("token", token.str() ) );
   }
Esempio n. 4
0
   std::string stringFromStream( T& in )
   {
      fc::stringstream token;
      try
      {
         char c = in.peek();

         if( c != '"' )
            FC_THROW_EXCEPTION( parse_error_exception,
                                            "Expected '\"' but read '${char}'",
                                            ("char", string(&c, (&c) + 1) ) );
         in.get();
         while( true )
         {

            switch( c = in.peek() )
            {
               case '\\':
                  token << parseEscape( in );
                  break;
               case 0x04:
                  FC_THROW_EXCEPTION( parse_error_exception, "EOF before closing '\"' in string '${token}'",
                                                   ("token", token.str() ) );
               case '"':
                  in.get();
                  return token.str();
               default:
                  token << c;
                  in.get();
            }
         }
         FC_THROW_EXCEPTION( parse_error_exception, "EOF before closing '\"' in string '${token}'",
                                          ("token", token.str() ) );
       } FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'",
                                          ("token", token.str() ) );
   }
Esempio n. 5
0
   std::string quoteStringFromStream( T& in )
   {
       fc::stringstream token;
       try
       {
           char q = in.get();
           switch( q )
           {
               case '\'':
                   if( strict )
                       FC_THROW_EXCEPTION( parse_error_exception, "expected: '\"' at beginning of string, got '\''" );
                   // falls through
               case '"':
                   break;
               default:
                   if( strict )
                       FC_THROW_EXCEPTION( parse_error_exception, "expected: '\"' at beginning of string" );
                   else
                       FC_THROW_EXCEPTION( parse_error_exception, "expected: '\"' | '\\\'' at beginning of string" );
           }
           if( in.peek() == q )
           {
               in.get();
               try
               {
                  if( in.peek() != q )
                     return std::string();
               }
               catch( const fc::eof_exception& e )
               {
                  return std::string();
               }

               // triple quote processing
               if( strict )
                   FC_THROW_EXCEPTION( parse_error_exception, "triple quote unsupported in strict mode" );
               else
               {
                   in.get();
                   
                   while( true )
                   {
                       char c = in.peek();
                       if( c == q )
                       {
                           in.get();
                           char c2 = in.peek();
                           if( c2 == q )
                           {
                               in.get();
                               char c3 = in.peek();
                               if( c3 == q )
                               {
                                   in.get();
                                   return token.str();
                               }
                               token << q << q;
                               continue;
                           }
                           token << q;
                           continue;
                       }
                       else if( c == '\x04' )
                           FC_THROW_EXCEPTION( parse_error_exception, "unexpected EOF in string '${token}'",
                                      ("token", token.str() ) );
                       else if( allow_escape && (c == '\\') )
                           token << parseEscape( in );
                       else
                       {
                           in.get();
                           token << c;
                       }
                   }
               }
           }
           
           while( true )
           {
               char c = in.peek();

               if( c == q )
               {
                   in.get();
                   return token.str();
               }
               else if( c == '\x04' )
                   FC_THROW_EXCEPTION( parse_error_exception, "unexpected EOF in string '${token}'",
                              ("token", token.str() ) );
               else if( allow_escape && (c == '\\') )
                   token << parseEscape( in );
               else if( (c == '\r') | (c == '\n') )
                   FC_THROW_EXCEPTION( parse_error_exception, "unexpected EOL in string '${token}'",
                              ("token", token.str() ) );
               else
               {
                   in.get();
                   token << c;
               }
           }
           
       } FC_RETHROW_EXCEPTIONS( warn, "while parsing token '${token}'",
                                          ("token", token.str() ) );
   }
Esempio n. 6
0
NFAUNode * WCPattern::parse(const bool inParen, const bool inOr, NFAUNode ** end)
{
	NFAUNode * start, *cur, *next = NULL;
	CMString t;
	int grc = groupCount++;
	bool inv, quo;
	bool ahead = 0, pos = 0, noncap = 0, indep = 0;
	unsigned long oldFlags = flags;

	if (inParen) {
		if (pattern[curInd] == '?') {
			++curInd;
			--groupCount;
			if (pattern[curInd] == ':')   { noncap = 1; ++curInd;     grc = --nonCapGroupCount; }
			else if (pattern[curInd] == '=')   { ++curInd;     ahead = 1;  pos = 1; }
			else if (pattern[curInd] == '!')   { ++curInd;     ahead = 1;  pos = 0; }
			else if (pattern.Mid(curInd, 2) == L"<=")  { curInd += 2;  return parseBehind(1, end); }
			else if (pattern.Mid(curInd, 2) == L"<!")  { curInd += 2;  return parseBehind(0, end); }
			else if (pattern[curInd] == '>')   { ++curInd;     indep = 1; }
			else {
				bool negate = false, done = false;
				while (!done) {
					if (curInd >= pattern.GetLength()) {
						raiseError();
						return NULL;
					}
					else if (negate) {
						switch (pattern[curInd]) {
						case 'i': flags &= ~WCPattern::CASE_INSENSITIVE;   break;
						case 'd': flags &= ~WCPattern::UNIX_LINE_MODE;     break;
						case 'm': flags &= ~WCPattern::MULTILINE_MATCHING; break;
						case 's': flags &= ~WCPattern::DOT_MATCHES_ALL;    break;
						case ':': done = true;                             break;
						case ')':
							++curInd;
							*end = registerNode(new NFALookBehindUNode(L"", true));
							return *end;
						case '-':
						default:
							raiseError();
							return NULL;
						}
					}
					else {
						switch (pattern[curInd]) {
						case 'i': flags |= WCPattern::CASE_INSENSITIVE;    break;
						case 'd': flags |= WCPattern::UNIX_LINE_MODE;      break;
						case 'm': flags |= WCPattern::MULTILINE_MATCHING;  break;
						case 's': flags |= WCPattern::DOT_MATCHES_ALL;     break;
						case ':': done = true;                             break;
						case '-': negate = true;                           break;
						case ')':
							++curInd;
							*end = registerNode(new NFALookBehindUNode(L"", true));
							return *end;
						default:
							raiseError();
							return NULL;
						}
					}
					++curInd;
				}
				noncap = 1;
				grc = --nonCapGroupCount;
			}

			if (noncap) cur = start = registerNode(new NFAGroupHeadUNode(grc));
			else        cur = start = registerNode(new NFASubStartUNode);
		}
		else cur = start = registerNode(new NFAGroupHeadUNode(grc));
	}
	else cur = start = registerNode(new NFASubStartUNode);

	while (curInd < pattern.GetLength()) {
		wchar_t ch = pattern[curInd++];

		next = NULL;
		if (error) return NULL;
		switch (ch) {
		case '^':
			if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAStartOfLineUNode);
			else                                            next = registerNode(new NFAStartOfInputUNode);
			break;
		case '$':
			if ((flags & WCPattern::MULTILINE_MATCHING) != 0) next = registerNode(new NFAEndOfLineUNode);
			else                                            next = registerNode(new NFAEndOfInputUNode(0));
			break;
		case '|':
			--groupCount;
			cur->next = registerNode(new NFAAcceptUNode);
			cur = start = registerNode(new NFAOrUNode(start, parse(inParen, 1)));
			break;
		case '\\':
			if (curInd < pattern.GetLength()) {
				bool eoi = 0;
				switch (pattern[curInd]) {
				case '1':
				case '2':
				case '3':
				case '4':
				case '5':
				case '6':
				case '7':
				case '8':
				case '9': next = parseBackref(); break;
				case 'A': ++curInd; next = registerNode(new NFAStartOfInputUNode);     break;
				case 'B': ++curInd; next = registerNode(new NFAWordBoundaryUNode(0));  break;
				case 'b': ++curInd; next = registerNode(new NFAWordBoundaryUNode(1));  break;
				case 'G': ++curInd; next = registerNode(new NFAEndOfMatchUNode);       break;
				case 'Z': eoi = 1;
				case 'z': ++curInd; next = registerNode(new NFAEndOfInputUNode(eoi));  break;
				default:
					t = parseEscape(inv, quo);
					//printf("inv quo classes { %c %c %s }\n", inv ? 't' : 'f', quo ? 't' : 'f', t.c_str());
					if (!quo) {
						if (t.GetLength() > 1 || inv) {
							if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACIClassUNode(t, inv));
							else                                            next = registerNode(new NFAClassUNode(t, inv));
						}
						else next = registerNode(new NFACharUNode(t[0]));
					}
					else next = parseQuote();
				}
			}
			else raiseError();
			break;
		case '[':
			if ((flags & WCPattern::CASE_INSENSITIVE) == 0) {
				NFAClassUNode * clazz = new NFAClassUNode();
				CMString s = parseClass();
				for (int i = 0; i < (int)s.GetLength(); ++i) clazz->vals[s[i]] = 1;
				next = registerNode(clazz);
			}
			else {
				NFACIClassUNode * clazz = new NFACIClassUNode();
				CMString s = parseClass();
				for (int i = 0; i < s.GetLength(); ++i) clazz->vals[to_lower(s[i])] = 1;
				next = registerNode(clazz);
			}
			break;
		case '.':
			{
				bool useN = 1, useR = 1;
				NFAClassUNode * clazz = new NFAClassUNode(1);
				if ((flags & WCPattern::UNIX_LINE_MODE) != 0) useR = 0;
				if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
				if (useN) clazz->vals['\n'] = 1;
				if (useR) clazz->vals['\r'] = 1;
				next = registerNode(clazz);
			}
			break;
		case '(':
			{
				NFAUNode *end, *t1, *t2;
				t1 = parse(1, 0, &end);
				if (!t1) raiseError();
				else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL) {
					cur->next = t2;
					cur = t2->next;
				}
				else {
					cur->next = t1;
					cur = end;
				}
			}
			break;
		case ')':
			if (!inParen) raiseError();
			else if (inOr) {
				--curInd;
				cur = cur->next = registerNode(new NFAAcceptUNode);
				flags = oldFlags;
				return start;
			}
			else {
				if (ahead) {
					cur = cur->next = registerNode(new NFAAcceptUNode);
					flags = oldFlags;
					return *end = registerNode(new NFALookAheadUNode(start, pos));
				}
				else if (indep) {
					cur = cur->next = registerNode(new NFAAcceptUNode);
					flags = oldFlags;
					return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1));
				}
				else { // capping or noncapping, it doesnt matter
					*end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc));
					next = quantifyGroup(start, *end, grc);
					if (next) {
						start = next;
						*end = next->next;
					}
					flags = oldFlags;
					return start;
				}
			}
			break;
		case '{': // registered pattern
			cur->next = parseRegisteredWCPattern(&next);
			if (cur->next) cur = next;
			break;
		case '*':
		case '+':
		case '?':
			//    case '}':
			//    case ']':
			raiseError();
			break;
		default:
			if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch));
			else                                          next = registerNode(new NFACharUNode(ch));
			break;
		}
		if (next) cur = cur->next = quantify(next);
	}
	if (inParen) raiseError();
	else {
		if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode);
		if (end) *end = cur;
	}

	flags = oldFlags;
	if (error) return NULL;

	return start;
}
Esempio n. 7
0
CMString WCPattern::parseClass()
{
	CMString t, ret;
	wchar_t ch, c1, c2;
	bool inv = 0, neg = 0, quo = 0;

	if (curInd < pattern.GetLength() && pattern[curInd] == '^') {
		++curInd;
		neg = 1;
	}

	while (curInd < pattern.GetLength() && pattern[curInd] != ']') {
		ch = pattern[curInd++];
		if (ch == '[') {
			t = parseClass();
			ret = classUnion(ret, t);
		}
		else if (ch == '&' && curInd < pattern.GetLength() && pattern[curInd] == '&') {
			if (pattern[++curInd] != '[') {
				raiseError();
				curInd = pattern.GetLength();
			}
			else {
				++curInd;
				t = parseClass();
				ret = classIntersect(ret, t);
			}
		}
		else if (ch == '\\') {
			t = parseEscape(inv, quo);
			if (quo) {
				raiseError();
				curInd = pattern.GetLength();
			}
			else if (inv || t.GetLength() > 1) { // cant be part of a range (a-z)
				if (inv) t = classNegate(t);
				ret = classUnion(ret, t);
			}
			else if (curInd < pattern.GetLength() && pattern[curInd] == '-') { // part of a range (a-z) 
				c1 = t[0];
				++curInd;
				if (curInd >= pattern.GetLength()) raiseError();
				else {
					c2 = pattern[curInd++];
					if (c2 == '\\') {
						t = parseEscape(inv, quo);
						if (quo) {
							raiseError();
							curInd = pattern.GetLength();
						}
						else if (inv || t.GetLength() > 1) raiseError();
						else ret = classUnion(ret, classCreateRange(c1, c2));
					}
					else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') {
						raiseError();
						curInd = pattern.GetLength();
					}
					else ret = classUnion(ret, classCreateRange(c1, c2));
				}
			}
			else ret = classUnion(ret, t);
		}
		else if (curInd < pattern.GetLength() && pattern[curInd] == '-') {
			c1 = ch;
			++curInd;
			if (curInd >= pattern.GetLength()) raiseError();
			else {
				c2 = pattern[curInd++];
				if (c2 == '\\') {
					t = parseEscape(inv, quo);
					if (quo) {
						raiseError();
						curInd = pattern.GetLength();
					}
					else if (inv || t.GetLength() > 1) raiseError();
					else ret = classUnion(ret, classCreateRange(c1, c2));
				}
				else if (c2 == '[' || c2 == ']' || c2 == '-' || c2 == '&') {
					raiseError();
					curInd = pattern.GetLength();
				}
				else ret = classUnion(ret, classCreateRange(c1, c2));
			}
		}
		else ret.AppendChar(ch);
	}

	if (curInd >= pattern.GetLength() || pattern[curInd] != ']') {
		raiseError();
		ret = L"";
	}
	else {
		++curInd;
		if (neg) ret = classNegate(ret);
	}
	return ret;
}