Exemplo n.º 1
0
    void Scanner::handleLineComment()
    {
        loc_ = getTokenLocation();

        if (currentChar_ == '(' && peekChar() == '*')
        {
            // eat *
            getNextChar();
            // update currentChar_
            getNextChar();

            while (!(currentChar_ == '*' && peekChar() == ')'))
            {
                // skip comment content
                getNextChar();

                // accident EOF
                if (input_.eof())
                {
                    errorToken(getTokenLocation().toString() + "end of file happended in comment, *) is expected!, but find " + currentChar_);
                    errorFlag_ = true;
                    break;
                }
            }

            if (!input_.eof())
            {
                // eat *
                getNextChar();
                // eat ) and update currentChar_
                getNextChar();
            }
        }
    }
Exemplo n.º 2
0
KAbstractHdrParserPrivate::token_id KAbstractHdrParserPrivate::lexTokenKeyValue(KAbstractLexer::token_type &token)
{
  // Read Key
  m_key = currChar();
  for (;;)
  {
    if (peekChar() != '=')
    {
      m_key += nextChar();
    }
    else
    {
      nextChar();
      break;
    }
  }

  // Read Value
  m_value = nextChar();
  for (;;)
  {
    if (peekChar() != '\n')
    {
      m_value += nextChar();
    }
    else
    {
      nextChar();
      break;
    }
  }

  return PT_KEYVALUE;
}
Exemplo n.º 3
0
	long readHex()
	{
		if ( err )
			return 0;

		skipWhitespace();

		// hex must start with alphanumeric character
		char ch;
		if ( !peekChar(&ch) || !isalnum(ch) )
		{
			err = TextFile::ERROR_PARSE;
			return 0;
		}

		long x = 0;
		while ( peekChar(&ch) )
		{
			switch ( ch )
			{
			case '0':	x <<= 4; x += 0; break;
			case '1':	x <<= 4; x += 1; break;
			case '2':	x <<= 4; x += 2; break;
			case '3':	x <<= 4; x += 3; break;
			case '4':	x <<= 4; x += 4; break;
			case '5':	x <<= 4; x += 5; break;
			case '6':	x <<= 4; x += 6; break;
			case '7':	x <<= 4; x += 7; break;
			case '8':	x <<= 4; x += 8; break;
			case '9':	x <<= 4; x += 9; break;
			case 'a':
			case 'A':	x <<= 4; x += 0xA; break;
			case 'b':
			case 'B':	x <<= 4; x += 0xB; break;
			case 'c':
			case 'C':	x <<= 4; x += 0xC; break;
			case 'd':
			case 'D':	x <<= 4; x += 0xD; break;
			case 'e':
			case 'E':	x <<= 4; x += 0xE; break;
			case 'f':
			case 'F':	x <<= 4; x += 0xF; break;
			default:	return x;
			}
			readChar( &ch );
		}
		return x;
	}
Exemplo n.º 4
0
    void Scanner::handleOperationState()
    {
        loc_ = getTokenLocation();
        bool matched = false;
        // add current symbol char
        addToBuffer(currentChar_);
        // add next one symbol char
        addToBuffer(peekChar());

        if (dictionary_.haveToken(buffer_))
        {
            matched = true;
            getNextChar();
        }
        else
        {
            reduceBuffer();
        }

        auto tokenMeta = dictionary_.lookup(buffer_);
        // token type, token value, name, symbol precedence
        makeToken(std::get<0>(tokenMeta), std::get<1>(tokenMeta), loc_, buffer_, std::get<2>(tokenMeta));
        // update currentChar_
        getNextChar();
    }
Exemplo n.º 5
0
int KAbstractObjParserPrivate::lexReadInteger(int *sign, int *power)
{
  *sign = 1;
  int pow = 10;
  int integer = 0;

  // Check for negation
  if (currChar() == '-')
    *sign = -1;
  else if (currChar() == '+')
    ; // Do nothing, sign is already 1
  else
    integer = Karma::ctoi(currChar());

  // Read the integer value
  while (Karma::isNumeric(peekChar()))
  {
    pow *= 10;
    integer *= 10;
    integer += Karma::ctoi(nextChar());
  }

  (*power) = pow;
  return integer;
}
Exemplo n.º 6
0
// Parse one simple selector, e.g.: `type#id.class1.class2.class3`
SimpleSelector CSSParser::parseSimpleSelector(){
    QString tagName;
    QString id;
    QVector<QString> classes;
    if(!eof()){
        QChar c = peekChar();
        if(c == '#'){
            consumeChar('#');
            id = parseIdentifier();
            qDebug() << "#";
        } else if(c == '.') {
            consumeChar('.');
            classes.append(parseIdentifier());
            qDebug() << ".";
        } else if(c == '*'){
            consumeChar('*');//TODO
            qDebug() << "*";
        } else if(isVaidIdentifier(c)){
            //h1 div.note
            tagName = parseIdentifier();
        }
    }
    SimpleSelector ret{tagName, id, classes};
#ifdef CSS_DEBUG
    qDebug() << Q_FUNC_INFO;
#endif /* CSS_DEBUG */
    return ret;

}
/*!
  Get UCS-2 character from stream
 \param ucs Reference to the character

 Returns single character for CRLF combination
 */
bool ImportStream::getChar(UT_UCSChar &ucs)
{
	if (!getRawChar(ucs))
		return false;
	if (ucs == UCS_CR && peekChar() == UCS_LF)
		getRawChar(ucs);
	return true;
}
Exemplo n.º 8
0
	bool readChar( char* ch )
	{
		if ( err )
			return false;

		bool more = peekChar( ch );
		m_peeked = false;
		if ( more && *ch == '\n' )
			++line;
		return more;
	}
Exemplo n.º 9
0
inline bool	ConsumerParser::readChar(char c)
{
  if (_skipBlank)
    skipBlanks();
  if (peekChar(c)) // He'll call addData()
    {
      _rwHeader++;
      return (true);
    }
  return (false);
}
Exemplo n.º 10
0
    bool getChar(char c, CurrPtr& curr, EndPtr end)
    {
        if (!more(curr, end))
            return false;

        if (peekChar(curr, end) != c)
            return false;

        getChar(curr, end);
        return true;
    }
Exemplo n.º 11
0
// ipv6HexDigit4 *(':' ipv6HexDigit4) ['::' [ipv6HexSeq]]
// where the first component, ipv6HexDigit4 is already parsed
FlowToken FlowLexer::continueParseIPv6(bool firstComplete)
{
	bool rv = true;
	if (firstComplete) {
		while (currentChar() == ':' && peekChar() != ':') {
			stringValue_ += ':';
			nextChar();

			if (!ipv6HexDigit4())
				return false;
		}

		if (currentChar() == ':' && peekChar() == ':') {
			stringValue_ += "::";
			nextChar();
			nextChar();
			rv = isHexChar() ? ipv6HexSeq() : true;
		}
	} else {
		ipv6HexDigits_ = stringValue_.size();
		rv = ipv6HexPart();
	}

	// parse embedded IPv4 remainer
	while (currentChar_ == '.' && std::isdigit(peekChar())) {
		stringValue_ += '.';
		nextChar();

		while (std::isdigit(currentChar_)) {
			stringValue_ += static_cast<char>(currentChar_);
			nextChar();
		}
	}

	if (rv && ipValue_.set(stringValue_.c_str(), IPAddress::V6))
		return token_ = FlowToken::IP;
	else
		return token_ = FlowToken::Unknown;
}
Exemplo n.º 12
0
// IPv6_HexPart ::= IPv6_HexSeq                        # (1)
//                | IPv6_HexSeq "::" [IPv6_HexSeq]     # (2)
//                            | "::" [IPv6_HexSeq]     # (3)
//
bool FlowLexer::ipv6HexPart()
{
	bool rv;

	if (currentChar() == ':' && peekChar() == ':') { // (3)
		stringValue_ = "::";
		nextChar(); // skip ':'
		nextChar(); // skip ':'
		rv = isHexChar() ? ipv6HexSeq() : true;
	} else if (!!(rv = ipv6HexSeq())) {
		if (currentChar() == ':' && peekChar() == ':') { // (2)
			stringValue_ += "::";
			nextChar(); // skip ':'
			nextChar(); // skip ':'
			rv = isHexChar() ? ipv6HexSeq() : true;
		}
	}

	if (std::isalnum(currentChar_) || currentChar_ == ':')
		rv = false;

	return rv;
}
Exemplo n.º 13
0
	bool skipWhitespace()
	{
		if ( err )
			return false;

		char ch;
		while ( peekChar(&ch) )
		{
			if ( !isspace(ch) )
				break;
			readChar( &ch );
		}
		return !eof();
	}
Exemplo n.º 14
0
// 1*4HEXDIGIT *(':' 1*4HEXDIGIT)
bool FlowLexer::ipv6HexSeq()
{
	if (!ipv6HexDigit4())
		return false;

	while (currentChar() == ':' && peekChar() != ':') {
		stringValue_ += ':';
		nextChar();

		if (!ipv6HexDigit4())
			return false;
	}

	return true;
}
Exemplo n.º 15
0
KAbstractObjParserPrivate::token_id KAbstractObjParserPrivate::lexTokenInteger(token_type &token)
{
  int sign;
  int integer = lexReadInteger(&sign);

  if (peekChar() == '.')
  {
    nextChar(); nextChar(); // Eat the decimal
    return lexTokenFloat(token, sign, integer);
  }

  // We've read an integer, set token attributes.
  token.m_attribute.asInteger = sign * integer;
  return PT_INTEGER;
}
Exemplo n.º 16
0
KAbstractObjParserPrivate::token_id KAbstractObjParserPrivate::lexTokenIdentifier(token_type &token)
{
  // Read and resolve symbol
  token.m_lexicon = currChar();
  for (;;)
  {
    if (Karma::isAlpha(peekChar()))
    {
      token.m_lexicon += nextChar();
    }
    else
    {
      return symResolve(token, PT_STRING);
    }
  }
}
Exemplo n.º 17
0
//auto, #000000, 78px
QSharedPointer<Value> CSSParser::parseValue(){
#ifdef CSS_DEBUG
    qDebug() << Q_FUNC_INFO;
#endif /* CSS_DEBUG */
    QChar c = peekChar();
    if (c.isDigit()) {
        return parseLength();
    } else if (c == '#') {
        return parseColor();
    } else {
//        QSharedPointer<KeywordValue> ret = QSharedPointer<KeywordValue>(new KeywordValue(parseIdentifier()));
        return QSharedPointer<KeywordValue>(new KeywordValue(parseIdentifier()));
    }
    qDebug() << Q_FUNC_INFO << "enter a dead end";

}
Exemplo n.º 18
0
//{ margin: auto; color: #cc0000; }
QVector<Declaration> CSSParser::parseDeclarations(){
    consumeChar('{');
    QVector<Declaration> declarations;
    //    while(peekChar() != '}'){
    //        declarations.append(parseDeclaration());
    //    }

    while (true) {
        consumeWhitespaceOrNewline();
        if (peekChar() == '}') {
            consumeChar();
            break;
        }
        declarations.append(parseDeclaration());
    }
    return declarations;
}
Exemplo n.º 19
0
KAbstractObjParserPrivate::token_id KAbstractObjParserPrivate::lexTokenFloat(token_type &token, int sign, int integer)
{
  int power, powSign;
  int fraction = lexReadInteger(&powSign, &power);
  float decimal = double(fraction) / power;
  float value = sign * (integer + decimal);

  if (Karma::toLower(peekChar()) == 'e')
  {
    nextChar(); nextChar(); // Eat exponent
    return lexTokenFloatExponent(token, value);
  }

  // We've read a float, set token attributes.
  token.m_attribute.asFloat = value;
  return PT_FLOAT;
}
Exemplo n.º 20
0
static void peekInt(void) {
  current.kind = tINT;

  while (1) {
    const int c = peekChar();
    switch (c) {
      case '0' ... '9':
        consumeChar();
        current.value = current.value * 10 + (c - '0');
        break;
      case '_':
        consumeChar();
        break;
      default:
        return;
    }
  }
}
Exemplo n.º 21
0
    void Scanner::handleStringState()
    {
        loc_ = getTokenLocation();
        // eat ' and NOT update currentChar_
        // because we don't want ' (single quote).
        getNextChar();

        while (true)
        {
            if (currentChar_ == '\'')
            {
                // '''' condition
                // see pascal standard section 6.1.7
                if (peekChar() == '\'')
                {
                    getNextChar();
                }
                // otherwise, we have handle string literal completely.
                else
                {
                    break;
                }
            }

            addToBuffer(currentChar_);
            getNextChar();
        }

        // eat end ' and update currentChar_ .
        getNextChar();

        // just one char
        if (buffer_.length() == 1)
        {
            makeToken(TokenType::CHAR, TokenValue::UNRESERVED, loc_,
                      static_cast<long>(buffer_.at(0)), buffer_);
        }
        else
        {
            makeToken(TokenType::STRING_LITERAL, TokenValue::UNRESERVED,
                      loc_, buffer_, -1);
        }
    }
Exemplo n.º 22
0
QVector<SimpleSelector> CSSParser::parseSimpleSelectors(){
    QVector<SimpleSelector> selectors;
    while (true) {
        consumeWhitespaceOrNewline();
        selectors.append(parseSimpleSelector());
        consumeWhitespaceOrNewline();
        QChar c = peekChar();
        if (c == ',') {
            consumeChar();
        } else if (c == '{') {
            break;
        } else {
            qDebug() << "Illegal character in selector list";
        }
    }
    //todo sort selectors by specificity
    return selectors;

}
Exemplo n.º 23
0
	bool readString( char* buf, int size )
	{
		if ( err )
			return false;

		skipWhitespace();

		int count = 0;
		char ch;
		while ( peekChar(&ch) )
		{
			if ( isspace(ch) )
				break;
			if ( count+1 < size )
				buf[count++] = ch;
			readChar( &ch );
		}
		if ( size > 0 )
			buf[count] = 0;
		return count > 0;
	}
Exemplo n.º 24
0
static void peekId(void) {
  current.kind = tID;

  int len = 0;

  while (1) {
    const int c = peekChar();
    switch (c) {
      case 'a' ... 'z':
      case '0' ... '9':
        consumeChar();
        len++;
        current.ptr = realloc(current.ptr, len + 1);
        current.ptr[len - 1] = c;
        current.ptr[len] = 0;
        break;
      default:
        if (strcmp(current.ptr, "if") == 0) {
          current.kind = tIF;
        } else if (strcmp(current.ptr, "else") == 0) {
          current.kind = tELSE;
        } else if (strcmp(current.ptr, "while") == 0) {
          current.kind = tWHILE;
        } else if (strcmp(current.ptr, "print") == 0) {
          current.kind = tPRINT;
        } else if (strcmp(current.ptr, "fun") == 0) {
          current.kind = tFUN;
        } else if (strcmp(current.ptr, "return") == 0) {
          current.kind = tRETURN;
        } else {
          current.kind = tID;
        }
        if (current.kind != tID) {
          free(current.ptr);
          current.ptr = 0;
        }
        return;
    }
  }
}
Exemplo n.º 25
0
    void Scanner::handleFraction()
    {
        // currentChar_ is . (dot)

        // if we have number 4..12. just simple error condition.
        // our compiler has one big difference compared with
        // commercial compiler, that is about error conditions.
        if (peekChar() == '.')
        {
            errorToken(getTokenLocation().toString() + "Fraction number can not have dot after dot");
            errorFlag_ = true;
        }

        // eat .
        addToBuffer(currentChar_);
        getNextChar();

        while (std::isdigit(currentChar_))
        {
            addToBuffer(currentChar_);
            getNextChar();
        }
    }
  void lexPPNumber (basl::Token & token)
  {
    // do nothing if not a pp-number
    if (token.getNumber () != PP_NUMBER_TOKEN)
    {
      return;
    }

    // lex states
    enum
    {
      // start
      S_START,

      // zero
      S_ZERO,

      // zero followed by X
      S_ZERO_X,

      // octal integer
      S_OCTAL_INT,

      // hex integer
      S_HEX_INT,
    
      // integer
      S_INT,

      // integer with l suffix
      S_INT_L_SUFFIX,

      // integer with u suffix
      S_INT_U_SUFFIX,

      // integer with ul suffix
      S_INT_UL_SUFFIX,

      // integer with ll suffix
      S_INT_LL_SUFFIX,

      // integer with i suffix, 64 next 
      S_INT_I_SUFFIX,

      // integer with i6 suffix, 4 next
      S_INT_I6_SUFFIX,

      // integer with ui suffix, 64 next
      S_INT_UI_SUFFIX,

      // integer with ui6 suffix, 4 next
      S_INT_UI6_SUFFIX,

      // integer with suffix
      S_INT_SUFFIX,

      // float
      S_FLOAT,

      // float followed by E
      S_FLOAT_E,

      // float followed by E and sign
      S_FLOAT_E_SIGN,

      // float exponent (number)
      S_FLOAT_EXP,

      // float followed by suffix
      S_FLOAT_SUFFIX,

      // .
      S_DOT
    };

    util::IdentTable & ident_table = util::getIdentTable ();
    util::Loc const & loc = token.getLoc ();
    char const * s = token.getLexeme ().c_str ();
    // true if octal error
    bool octal_error = false;
    int kind = 0;
    // true if message output while lexing
    bool error = false;
    int state = S_START;
    for (;;)
    {
      char ch = * s;
      switch (state)
      {
        // start
        case S_START:
        {
          if (ch == '0')
          {
            state = S_ZERO;
          }
          else if (ch >= '1' && ch <= '9')
          {
            state = S_INT;
          }
          else
          {
            assert (ch == '.');
            state = S_DOT;
          }
          break;
        }
        // zero
        case S_ZERO:
        {
          if (ch == 'x' || ch == 'X')
          {
            state = S_ZERO_X;
          }
          else if (ch == '.')
          {
            state = S_FLOAT;
          }
          else if (ch == 'E' || ch == 'e')
          {
            state = S_FLOAT_E;
          }
          else if (ch == 'L' || ch == 'l')
          {
            state = S_INT_L_SUFFIX;
          }
          else if (ch == 'U' || ch == 'u')
          {
            state = S_INT_U_SUFFIX;
          }
          else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4')
          {
            state = S_INT_I_SUFFIX;
          }
          else if (ch >= '0' && ch <= '7')
          {
            state = S_OCTAL_INT;
          }
          else if (ch >= '8' && ch <= '9')
          {
            // can't be an octal, maybe float
            octal_error = true;
            state = S_INT;
          }
          else
          {
            // this is a *zero* int literal token
            kind = ZERO_TOKEN;
          }
          break;
        }
        // zero followed by x
        case S_ZERO_X:
        {
          if (ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F')
          {
            state = S_HEX_INT;
          }
          else
          {
            // missing hex digit, append 0 to make valid
            msg::missingHexDigitsInIntLiteral (loc);
            error = true;
            // try to recover
            ident_table.push ('0');
            if (ch == 'L' || ch == 'l')
            {
              state = S_INT_L_SUFFIX;
            }
            else if (ch == 'U' || ch == 'u')
            {
              state = S_INT_U_SUFFIX;
            }
            else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4')
            {
              state = S_INT_I_SUFFIX;
            }
            else
            {
              kind = LITERAL_INT_TOKEN;
            }
          }
          break;
        }
        // hexadecimal integer
        case S_HEX_INT:
        {
          if (ch == 'L' || ch == 'l')
          {
            state = S_INT_L_SUFFIX;
          }
          else if (ch == 'U' || ch == 'u')
          {
            state = S_INT_U_SUFFIX;
          }
          else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4')
          {
            state = S_INT_I_SUFFIX;
          }
          else if (ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F')
          {
            // stay in same state
          }
          else
          {
            kind = LITERAL_INT_TOKEN;
          }
          break;
        }
        // octal integer
        case S_OCTAL_INT:
        {
          if (ch == '.')
          {
            state = S_FLOAT;
          }
          else if (ch == 'E' || ch == 'e')
          {
            state = S_FLOAT_E;
          }
          else if (ch == 'L' || ch == 'l')
          {
            state = S_INT_L_SUFFIX;
          }
          else if (ch == 'U' || ch == 'u')
          {
            state = S_INT_U_SUFFIX;
          }
          else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4')
          {
            state = S_INT_I_SUFFIX;
          }
          else if (ch >= '0' && ch <= '7')
          {
            // stay in same state
          }
          else if (ch >= '8' && ch <= '9')
          {
            // we've started as a octal but now have an 8 or a 9, if this token isn't a float then it's
            // an invalid octal int
            octal_error = true;
            state = S_INT;
          }
          else
          {
            kind = LITERAL_INT_TOKEN;
          }
          break;
        }
        // decimal integer
        case S_INT:
        {
          if (ch == '.')
          {
            state = S_FLOAT;
          }
          else if (ch == 'E' || ch == 'e')
          {
            state = S_FLOAT_E;
          }
          else if (ch == 'L' || ch == 'l')
          {
            state = S_INT_L_SUFFIX;
          }
          else if (ch == 'U' || ch == 'u')
          {
            state = S_INT_U_SUFFIX;
          }
          else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4')
          {
            state = S_INT_I_SUFFIX;
          }
          else if (ch >= '0' && ch <= '9')
          {
            // stay in same state
          }
          else
          {
            kind = LITERAL_INT_TOKEN;
          }
          break;
        }
        // integer with l suffix
        case S_INT_L_SUFFIX:
        {
          if (ch == 'U' || ch == 'u')
          {
            // 'lu' suffix so an int, we'll not allow an 'lul' suffix for 'unsinged long long'
            state = S_INT_SUFFIX;
          }
          else if (ch == 'L' || ch == 'l')
          {
            state = S_INT_LL_SUFFIX;
          }
          else
          {
            kind = LITERAL_INT_TOKEN;
          }
          break;
        }
        // integer with u suffix
        case S_INT_U_SUFFIX:
        {
          if (ch == 'L' || ch == 'l')
          {
            // could still be an 'ull' suffix
            state = S_INT_UL_SUFFIX;
          }
          else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4')
          {
            state = S_INT_UI_SUFFIX;
          }
          else
          {
            kind = LITERAL_INT_TOKEN;
          }
          break;
        }
        // integer with ul suffix
        case S_INT_UL_SUFFIX:
        {
          if (ch == 'L' || ch == 'l')
          {
            state = S_INT_SUFFIX;
          }
          else
          {
            kind = LITERAL_INT_TOKEN;
          }
          break;
        }
        // integer with ll suffix
        case S_INT_LL_SUFFIX:
        {
          if (ch == 'U' || ch == 'u')
          {
            state = S_INT_SUFFIX;
          }
          else
          {
            kind = LITERAL_INT_TOKEN;
          }
          break;
        }
        // integer with i suffix, 64 next
        case S_INT_I_SUFFIX:
        {
          state = S_INT_I6_SUFFIX;
          break;
        }
        // integer with i6 suffix, 4 next
        case S_INT_I6_SUFFIX:
        {
          state = S_INT_LL_SUFFIX;
          break;
        }
        // integer with ui suffix, 64 next
        case S_INT_UI_SUFFIX:
        {
          state = S_INT_UI6_SUFFIX;
          break;
        }
        // integer with ui6 suffix, 4 next
        case S_INT_UI6_SUFFIX:
        {
          state = S_INT_SUFFIX;
          break;
        }
        // int with suffix
        case S_INT_SUFFIX:
        {
          kind = LITERAL_INT_TOKEN;
          break;
        }
        // float
        case S_FLOAT:
        {
          if (ch == 'E' || ch == 'e')
          {
            state = S_FLOAT_E;
          }
          else if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l')
          {
            state = S_FLOAT_SUFFIX;
          }
          else if (ch >= '0' && ch <= '9')
          {
            // stay in same state
          }
          else
          {
            kind = LITERAL_FLOAT_TOKEN;
          }
          break;
        }
        // float with e
        case S_FLOAT_E:
        {
          if (ch == '+' || ch == '-')
          {
            state = S_FLOAT_E_SIGN;
          }
          else if (ch >= '0' && ch <= '9')
          {
            state = S_FLOAT_EXP;
          }
          else
          {
            // missing exponent, append 0 to make valid
            msg::missingExponentInFloatingLiteral (loc);
            error = true;
            // try to recover
            ident_table.push ('0');
            if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l')
            {
              state = S_FLOAT_SUFFIX;
            }
            else
            {
              kind = LITERAL_FLOAT_TOKEN;
            }
          }
          break;
        }
        // float with e and sign
        case S_FLOAT_E_SIGN:
        {
          if (ch >= '0' && ch <= '9')
          {
            state = S_FLOAT_EXP;
          }
          else
          {
            // missing exponent, append 0 to make valid
            msg::missingExponentInFloatingLiteral (loc);
            error = true;
            // try to recover
            ident_table.push ('0');
            if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l')
            {
              state = S_FLOAT_SUFFIX;
            }
            else
            {
              kind = LITERAL_FLOAT_TOKEN;
            }
          }
          break;
        }
        // float exponent
        case S_FLOAT_EXP:
        {
          if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l')
          {
            state = S_FLOAT_SUFFIX;
          }
          else if (ch >= '0' && ch <= '9')
          {
            // stay in same state
          }
          else
          {
            kind = LITERAL_FLOAT_TOKEN;
          }
          break;
        }
        // float with suffix
        case S_FLOAT_SUFFIX:
        {
          kind = LITERAL_FLOAT_TOKEN;
          break;
        }
        // dot
        case S_DOT:
        {
          assert (ch >= '0' && ch <= '9');
          state = S_FLOAT;
          break;
        }
      }
      if (kind != 0)
      {
        break;
      }
      // char part of lexeme
      ident_table.push (ch);
      ++ s;
    }

    // warn if octal error
    util::Ident lexeme = ident_table.getIdent ();
    if (kind == LITERAL_INT_TOKEN && octal_error)
    {
      msg::octalHasDigitsBeyondRadix (loc, lexeme);
    }

    // set the lexeme if not the same
    if (lexeme != token.getLexeme ())
    {
      // any chars remaining?
      if (* s != 0)
      {
        if (kind == LITERAL_FLOAT_TOKEN)
        {
          msg::invalidFloatingLiteralSuffix (loc, s, lexeme);
        }
        else
        {
          // otherwise literal int or literal zero token
          msg::invalidIntLiteralSuffix (loc, s, lexeme);
        }
      }
      // change the lexeme so expr will be able to correctly get is value  
      token.setLexeme (lexeme);
    }

    // and finally, set it's real kind 
    token.setNumber (kind);
  }
Exemplo n.º 27
0
static int getToken()
{
    const char tab[] = "abfnrtv";
    const char backTab[] = "\a\b\f\n\r\t\v";
    uint n;
    bool quiet;

    yyIdentLen = 0;
    yyCommentLen = 0;
    yyStringLen = 0;

    while ( yyCh != EOF ) {
        yyLineNo = yyCurLineNo;

        if ( isalpha(yyCh) || yyCh == '_' ) {
            do {
                if ( yyIdentLen < sizeof(yyIdent) - 1 )
                    yyIdent[yyIdentLen++] = (char) yyCh;
                yyCh = getChar();
            } while ( isalnum(yyCh) || yyCh == '_' );
            yyIdent[yyIdentLen] = '\0';

            bool might_be_str = false;

            switch ( yyIdent[0] ) {
                case 'N':
                    if ( strcmp(yyIdent + 1, "one") == 0 )
                        return Tok_None;
                    break;
                case 'Q':
                    if (strcmp(yyIdent + 1, "T_TR_NOOP") == 0)
                    {
                        yyParsingUtf8 = false;
                        return Tok_tr;
                    }
                    else if (strcmp(yyIdent + 1, "T_TR_NOOP_UTF8") == 0)
                    {
                        yyParsingUtf8 = true;
                        return Tok_trUtf8;
                    }
                    else if (strcmp(yyIdent + 1, "T_TRANSLATE_NOOP") == 0)
                    {
                        yyParsingUtf8 = false;
                        return Tok_translate;
                    }
                    break;
                case 'c':
                    if ( strcmp(yyIdent + 1, "lass") == 0 )
                        return Tok_class;
                    break;
                case 'f':
                    /*
                     * QTranslator::findMessage() has the same parameters as
                     * QApplication::translate().
                     */
                    if ( strcmp(yyIdent + 1, "indMessage") == 0 )
                        return Tok_translate;
                    break;
                case 'r':
                    if ( strcmp(yyIdent + 1, "eturn") == 0 )
                        return Tok_return;

                    /* Drop through. */

                case 'R':
                    if (yyIdent[1] == '\0')
                        might_be_str = true;
                    break;
                case 'b':
                case 'B':
                case 'u':
                case 'U':
                    if (yyIdent[1] == '\0')
                        might_be_str = true;
                    else if ((yyIdent[1] == 'r' || yyIdent[1] == 'R') && yyIdent[2] == '\0')
                        might_be_str = true;
                    break;
                case 't':
                    if ( strcmp(yyIdent + 1, "r") == 0 ) {
                        yyParsingUtf8 = false;
                        return Tok_tr;
                    } else if ( qstrcmp(yyIdent + 1, "rUtf8") == 0 ) {
                        yyParsingUtf8 = true;
                        return Tok_trUtf8;
                    } else if ( qstrcmp(yyIdent + 1, "ranslate") == 0 ) {
                        yyParsingUtf8 = false;
                        return Tok_translate;
                    }
                    break;
                case '_':
                    if ( strcmp(yyIdent + 1, "_tr") == 0 ) {
                        yyParsingUtf8 = false;
                        return Tok_tr;
                    } else if ( strcmp(yyIdent + 1, "_trUtf8") == 0 ) {
                        yyParsingUtf8 = true;
                        return Tok_trUtf8;
                    } else if ( qstrcmp(yyIdent + 1, "translate") == 0 ) {
                        yyParsingUtf8 = false;
                        return Tok_translate;
                    }
                    break;
            }

            /*
             * Handle the standard Python v2 and v3 string prefixes by simply
             * ignoring them.
             */

            if (!might_be_str)
                return Tok_Ident;

            if (yyCh != '"' && yyCh != '\'')
                return Tok_Ident;
        }
        {
            switch ( yyCh ) {
                case '#':
                    do {
                        yyCh = getChar();
                    } while ( yyCh != EOF && yyCh != '\n' );
                    break;
                case '"':
                case '\'':
                    int quoteChar;
                    int trippelQuote, singleQuote;
                    int in;

                    quoteChar = yyCh;
                    trippelQuote = 0;
                    singleQuote = 1;
                    in = 0;
                    yyCh = getChar();
                    quiet = false;

                    while ( yyCh != EOF ) {
                        if ( singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)) )
                            break;

                        if ( yyCh == quoteChar ) {
                            if (peekChar() == quoteChar) {
                                yyCh = getChar();
                                if (!trippelQuote) {
                                    trippelQuote = 1;
                                    singleQuote = 0;
                                    in = 1;
                                    yyCh = getChar();
                                } else {
                                    yyCh = getChar();
                                    if (yyCh == quoteChar) {
                                        trippelQuote = 0;
                                        break;
                                    }
                                }
                            } else if (trippelQuote) {
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = (char) yyCh;
                                yyCh = getChar();
                                continue;
                            } else
                                break;
                        } else
                            in = 1;

                        if ( yyCh == '\\' ) {
                            yyCh = getChar();

                            if ( yyCh == 'x' ) {
                                QByteArray hex = "0";

                                yyCh = getChar();
                                while ( isxdigit(yyCh) ) {
                                    hex += (char) yyCh;
                                    yyCh = getChar();
                                }
#if defined(_MSC_VER) && _MSC_VER >= 1400
                                sscanf_s( hex, "%x", &n );
#else
                                sscanf( hex, "%x", &n );
#endif
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = (char) n;
                            } else if ( yyCh >= '0' && yyCh < '8' ) {
                                QByteArray oct = "";
                                int n = 0;

                                do {
                                    oct += (char) yyCh;
                                    ++n;
                                    yyCh = getChar();
                                } while ( yyCh >= '0' && yyCh < '8' && n < 3 );
    #if defined(_MSC_VER) && _MSC_VER >= 1400
                                sscanf_s( oct, "%o", &n );
    #else
                                sscanf( oct, "%o", &n );
    #endif
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = (char) n;
                            } else if ( yyCh == '\n' ) {
                                yyCh = getChar();
                            } else {
                                const char *p = strchr( tab, yyCh );
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = ( p == 0 ) ?
                                            (char) yyCh : backTab[p - tab];
                                yyCh = getChar();
                            }
                        } else {
                            if (!yyCodecForSource) {
                                if ( yyParsingUtf8 && yyCh >= 0x80 && !quiet) {
                                    qWarning( "%s:%d: Non-ASCII character detected in trUtf8 string",
                                              (const char *) yyFileName, yyLineNo );
                                    quiet = true;
                                }
                                // common case: optimized
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = (char) yyCh;
                                yyCh = getChar();
                            } else {
                                QByteArray originalBytes;
                                while ( yyCh != EOF && (trippelQuote || yyCh != '\n') && yyCh != quoteChar && yyCh != '\\' ) {
                                    if ( yyParsingUtf8 && yyCh >= 0x80 && !quiet) {
                                        qWarning( "%s:%d: Non-ASCII character detected in trUtf8 string",
                                                (const char *) yyFileName, yyLineNo );
                                        quiet = true;
                                    }
                                    originalBytes += (char)yyCh;
                                    yyCh = getChar();
                                }

                                QString unicodeStr = yyCodecForSource->toUnicode(originalBytes);
                                QByteArray convertedBytes;

                                if (!yyCodecForTr->canEncode(unicodeStr) && !quiet) {
                                    qWarning( "%s:%d: Cannot convert Python string from %s to %s",
                                              (const char *) yyFileName, yyLineNo, yyCodecForSource->name().constData(),
                                              yyCodecForTr->name().constData() );
                                    quiet = true;
                                }
                                convertedBytes = yyCodecForTr->fromUnicode(unicodeStr);

                                size_t len = qMin((size_t)convertedBytes.size(), sizeof(yyString) - yyStringLen - 1);
                                memcpy(yyString + yyStringLen, convertedBytes.constData(), len);
                                yyStringLen += len;
                            }
                        }
                    }
                    yyString[yyStringLen] = '\0';

                    if ( yyCh != quoteChar ) {
                        if (trippelQuote)
                            qWarning("%s:%d: Empty or unterminated triple quoted string",
                                    (const char *)yyFileName, yyLineNo);
                        else
                            qWarning("%s:%d: Unterminated string",
                                    (const char *)yyFileName, yyLineNo);
                    }

                    if ( yyCh == EOF ) {
                        return Tok_Eof;
                    } else {
                        yyCh = getChar();
                        return Tok_String;
                    }
                    break;
                case '(':
                    if (yyParenDepth == 0)
                        yyParenLineNo = yyCurLineNo;
                        yyParenDepth++;
                        yyCh = getChar();
                        return Tok_LeftParen;
                case ')':
                    if (yyParenDepth == 0)
                        yyParenLineNo = yyCurLineNo;
                        yyParenDepth--;
                        yyCh = getChar();
                        return Tok_RightParen;
                case ',':
                    yyCh = getChar();
                    return Tok_Comma;
                case '.':
                    yyCh = getChar();
                    return Tok_Dot;
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    {
                        QByteArray ba;
                        ba+=yyCh;
                        yyCh = getChar();
                        bool hex = yyCh == 'x';
                        if ( hex ) {
                            ba+=yyCh;
                            yyCh = getChar();
                        }
                        while ( (hex ? isxdigit(yyCh) : isdigit(yyCh)) ) {
                            ba+=yyCh;
                            yyCh = getChar();
                        }
                        bool ok;
                        yyInteger = ba.toLongLong(&ok);
                        if (ok) return Tok_Integer;
                        break;
                    }
                default:
                    yyCh = getChar();
            }
        }
    }
    return Tok_Eof;
}
Exemplo n.º 28
0
Token Lexer::nextToken() {
	if (isDone()) {
		return *finish_token;
	}
	int  tokenType = END_OF_FILE;
	char c = nextChar();
	string currentValue="";
	string jsonValue ="";
	while (isDone() != true && isSpace(c)==true) {
		c = nextChar();
	}
	if ('"' == c) {
		tokenType = VALUE;
		if (isDone() != true) {
			c = nextChar();
			while (index < length && c != '"') {
				currentValue += c;
				if (c == '\\' && index < length) {
					c = nextChar();
					currentValue += c;
				}
				c = nextChar();
			}
			jsonValue = currentValue;
		} else {
			throw  KevoreeException("Lexer Unterminated string nextToken");
		}
	}else if ('{' == c) {
		tokenType = LEFT_BRACE;
	} else if ('}' == c) {
		tokenType = RIGHT_BRACE;
	} else if ('[' == c) {
		tokenType = LEFT_BRACKET;
	} else if (']' == c) {
		tokenType = RIGHT_BRACKET;
	} else if (':' == c) {
		tokenType = COLON;
	} else if (',' == c) {
		tokenType = COMMA;
	} else if (! isDone()) {

		while (isValueLetter(tolower(c)) == true)
		{
			currentValue += c;
			if (isValueLetter(tolower(peekChar())) != true) {
				break;
			} else {
				c = nextChar();
			}
		}
		string v = currentValue;
		std::transform(v.begin(), v.end(), v.begin(), ::tolower);
		if (v.compare("true") == 0) {
			jsonValue = "true";
		} else if (v.compare("false") == 0) {
			jsonValue = "false";
		} else {
			jsonValue = v;
		}
		tokenType = VALUE;
	} else {
		tokenType = END_OF_FILE;
	}
	return Token(tokenType, jsonValue);
}
Exemplo n.º 29
0
FlowToken FlowLexer::nextToken()
{
	bool expectsValue = token() == FlowToken::Ident || FlowTokenTraits::isOperator(token());

	lastPos_ = currPos_;

	if (consumeSpace())
		return token_ = FlowToken::Eof;

//	printf("FlowLexer.nextToken: currentChar %s curr[%zu:%zu.%zu] next[%zu:%zu.%zu]\n",
//		escape(currentChar_).c_str(),
//		currPos_.line, currPos_.column, currPos_.offset,
//		nextPos_.line, nextPos_.column, nextPos_.offset);

	content_.clear();
	content_ += static_cast<char>(currentChar_);
	lastLocation_ = currLocation_;
	currLocation_.begin = currPos_;

	switch (currentChar_) {
	case EOF: // (-1)
		return token_ = FlowToken::Eof;
	case '=':
		switch (nextChar()) {
		case '=':
			nextChar();
			return token_ = FlowToken::Equal;
		case '^':
			nextChar();
			return token_ = FlowToken::PrefixMatch;
		case '$':
			nextChar();
			return token_ = FlowToken::SuffixMatch;
		case '~':
			nextChar();
			return token_ = FlowToken::RegexMatch;
		case '>':
			nextChar();
			return token_ = FlowToken::HashRocket;
		default:
			return token_ = FlowToken::Assign;
		}
	case '<':
		switch (nextChar()) {
			case '<':
				nextChar();
				return token_ = FlowToken::Shl;
			case '=':
				nextChar();
				return token_ = FlowToken::LessOrEqual;
			default:
				return token_ = FlowToken::Less;
		}
	case '>':
		switch (nextChar()) {
			case '>':
				nextChar();
				return token_ = FlowToken::Shr;
			case '=':
				nextChar();
				return token_ = FlowToken::GreaterOrEqual;
			default:
				return token_ = FlowToken::Greater;
		}
	case '|':
		switch (nextChar()) {
			case '|':
				nextChar();
				return token_ = FlowToken::Or;
			case '=':
				nextChar();
				return token_ = FlowToken::OrAssign;
			default:
				return token_ = FlowToken::BitOr;
		}
	case '&':
		switch (nextChar()) {
			case '&':
				nextChar();
				return token_ = FlowToken::And;
			case '=':
				nextChar();
				return token_ = FlowToken::AndAssign;
			default:
				return token_ = FlowToken::BitAnd;
		}
	case '.':
		if (nextChar() == '.') {
			if (nextChar() == '.') {
				nextChar();
				return token_ = FlowToken::Ellipsis;
			}
			return token_ = FlowToken::DblPeriod;
		}
		return token_ = FlowToken::Period;
	case ':':
		if (peekChar() == ':') {
			stringValue_.clear();
			return continueParseIPv6(false);
		} else {
			nextChar();
			return token_ = FlowToken::Colon;
		}
	case ';':
		nextChar();
		return token_ = FlowToken::Semicolon;
	case ',':
		nextChar();
		return token_ = FlowToken::Comma;
	case '{':
		nextChar();
		return token_ = FlowToken::Begin;
	case '}':
		if (interpolationDepth_) {
			return token_ = parseInterpolationFragment(false);
		} else {
			nextChar();
			return token_ = FlowToken::End;
		}
	case '(':
		nextChar();
		return token_ = FlowToken::RndOpen;
	case ')':
		nextChar();
		return token_ = FlowToken::RndClose;
	case '[':
		nextChar();
		return token_ = FlowToken::BrOpen;
	case ']':
		nextChar();
		return token_ = FlowToken::BrClose;
	case '+':
		nextChar();
		return token_ = FlowToken::Plus;
	case '-':
		nextChar();
		return token_ = FlowToken::Minus;
	case '*':
		switch (nextChar()) {
			case '*':
				nextToken();
				return token_ = FlowToken::Pow;
			default:
				return token_ = FlowToken::Mul;
		}
	case '/':
		if (expectsValue)
			return token_ = parseString('/', FlowToken::RegExp);

		nextChar();
		return token_ = FlowToken::Div;
	case '%':
		nextChar();
		return token_ = FlowToken::Mod;
	case '!':
		switch (nextChar()) {
			case '=':
				nextChar();
				return token_ = FlowToken::UnEqual;
			default:
				return token_ = FlowToken::Not;
		}
	case '\'':
		return token_ = parseString(true);
	case '"':
		++interpolationDepth_;
		return token_ = parseInterpolationFragment(true);
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
		return parseNumber();
	default:
		if (std::isalpha(currentChar()) || currentChar() == '_')
			return token_ = parseIdent();

		if (std::isprint(currentChar()))
			printf("lexer: unknown char %c (0x%02X)\n", currentChar(), currentChar());
		else
			printf("lexer: unknown char %u (0x%02X)\n", currentChar() & 0xFF, currentChar() & 0xFF);

		nextChar();
		return token_ = FlowToken::Unknown;
	}
}
Exemplo n.º 30
0
/**
 * \retval true abort tokenizing in caller
 * \retval false continue tokenizing in caller
 */
bool FlowLexer::consumeSpace()
{
	// skip spaces
	for (;; nextChar()) {
		if (eof())
			return true;

		if (std::isspace(currentChar_))
			continue;

		if (std::isprint(currentChar_))
			break;

		// TODO proper error reporting through API callback
		std::fprintf(stderr, "%s[%04zu:%02zu]: invalid byte %d (0x%02X)\n",
				currLocation_.fileName.c_str(), nextPos_.line, nextPos_.column,
				currentChar() & 0xFF, currentChar() & 0xFF);
	}

	if (eof())
		return true;

	if (currentChar() == '#') {
		// skip chars until EOL
		for (;;) {
			if (eof()) {
				token_ = FlowToken::Eof;
				return true;
			}

			if (currentChar() == '\n') {
				nextChar();
				return consumeSpace();
			}

			nextChar();
		}
	}

	if (currentChar() == '/' && peekChar() == '*') { // "/*" ... "*/"
		// parse multiline comment
		nextChar();

		for (;;) {
			if (eof()) {
				token_ = FlowToken::Eof;
				// reportError(Error::UnexpectedEof);
				return true;
			}

			if (currentChar() == '*' && peekChar() == '/') {
				nextChar(); // skip '*'
				nextChar(); // skip '/'
				break;
			}

			nextChar();
		}

		return consumeSpace();
	}

	return false;
}