Пример #1
0
FormatToken Scanner::readNumber()
{
	if(!m_src.isEnd()) {
		if(m_src.peek().toLower() == QLatin1Char('x'))
		{
			m_src.move();
			while(isHexDigit(m_src.peek()))
				m_src.move();
			if(m_src.peek() == QLatin1Char('.'))
			{
				m_src.move();
				while(isHexDigit(m_src.peek()))
					m_src.move();
			}
			if(m_src.peek().toLower() == QLatin1Char('p'))
			{
				m_src.move();
				if( (m_src.peek() == QLatin1Char('+')) || (m_src.peek() == QLatin1Char('-')))
					m_src.move();
				while(m_src.peek().isDigit())
					m_src.move();
			}
		}
		else if(m_src.peek() == QLatin1Char('.') || m_src.peek().isDigit())
			return readFloatNumber();
	}
	return FormatToken(Format_Number, m_src.anchor(), m_src.length());
}
Пример #2
0
/*
 * URL decode a string, in the same way as URLDecoder.java in j2se, 
 * to be consistent with how IS does encoding/decoding.
 *
 * Throws std::invalid_argument and other std::exception's from std::string.
 */
std::string Http::decode(const std::string& encodedString)
{
    std::size_t encodedLen = encodedString.size();
    const char *encStr = encodedString.c_str();
    std::string decodedString;
    const char *tmpStr = NULL;
    std::size_t cnt = 0;

    // Reserve enough space for the worst case.
    decodedString.reserve(encodedLen);

    // Run down the length of the encoded string, examining each
    // character.  If it's a %, we discard it, read in the next two
    // characters, convert their hex value to a char, and write
    // that to the decoded string.  Anything else, we just copy over.
    for (std::size_t i = 0; i < encodedLen; ++i) {
	char curChar = encStr[i];

	if ('+' == curChar) {
	    if(tmpStr != NULL) {
		decodedString.append(tmpStr, cnt);
		tmpStr = NULL;
		cnt = 0;
	    }
	    PUSH_BACK_CHAR(decodedString, ' ');
	} else if ('%' == curChar) {
	    if(tmpStr != NULL) {
		decodedString.append(tmpStr, cnt);
		tmpStr = NULL;
		cnt = 0;
	    }
	    if (i + 2 < encodedLen && isHexDigit(encStr[i + 1]) &&
		isHexDigit(encStr[i + 2])) {
		unsigned int value;

		value = convertHexDigit(encStr[++i]);
		value = (value * 0x10) + convertHexDigit(encStr[++i]);
		PUSH_BACK_CHAR(decodedString, static_cast<char>(value));
	    } else {
		throw std::invalid_argument(
			"Http::decode() invalid %-escapes in " +
			encodedString);
	    }
	} else {
	    if(cnt == 0)
		tmpStr = encStr + i;
	    ++cnt;
	}
    }
    if(tmpStr != NULL) {
	decodedString.append(tmpStr, cnt);
	cnt = 0;
	tmpStr = NULL;
    }

    return decodedString;
}
//! \exception StSRecordParseException is thrown if either of the nibble characters
//!		is not a valid hex digit.
int StSRecordFile::readHexByte(std::string &inString, int inIndex)
{
    char nibbleCharHi = inString[inIndex];
    char nibbleCharLo = inString[inIndex + 1];

    // must be hex digits
    if (!(isHexDigit(nibbleCharHi) && isHexDigit(nibbleCharLo)))
    {
        throw StSRecordParseException("invalid hex digit");
    }

    return (hexDigitToInt(nibbleCharHi) << 4) | hexDigitToInt(nibbleCharLo);
}
Пример #4
0
 void HeaderParser::state_urlesc(char ch)
 {
     if (isHexDigit(ch))
     {
         if (token.size() >= 2 && token[token.size() - 2] == '%')
         {
             unsigned v = (valueOfHexDigit(token[token.size() - 1]) << 4) | valueOfHexDigit(ch);
             token[token.size() - 2] = static_cast<char>(v);
             token.resize(token.size() - 1);
             state = &HeaderParser::state_url;
             return;
         }
         else
         {
             token += ch;
             return;
         }
     }
     else
     {
         log_warn("invalid hex digit " << chartoprint(ch) << " in url");
         state = &HeaderParser::state_error;
         return;
     }
 }
Пример #5
0
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::uuid
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX
//   ...
//   XXXXXXXX-XX
//   XXXXXXXX-X
//   XXXXXXXX-
//   XXXXXXXX
//   Followed only by EOS, whitespace, operator or list.
bool Lexer::isUUID (std::string& token, Lexer::Type& type)
{
  std::size_t marker = _cursor;

  std::size_t i = 0;
  for (; i < 36 && marker + i < _eos; i++)
  {
    if (uuid_pattern[i] == 'x')
    {
      if (! isHexDigit (_text[marker + i]))
        break;
    }
    else if (uuid_pattern[i] != _text[marker + i])
      break;
  }

  if (i >= uuid_min_length)
  {
    token = _text.substr (_cursor, i);
    if (! isAllDigits (token))
    {
      type = Lexer::Type::uuid;
      _cursor += i;
      return true;
    }
  }

  return false;
}
Пример #6
0
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::uuid
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX
//   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX
//   ...
//   XXXXXXXX-XX
//   XXXXXXXX-X
//   XXXXXXXX-
//   XXXXXXXX
//   Followed only by EOS, whitespace, or single character operator.
bool Lexer::isUUID (std::string& token, Lexer::Type& type, bool endBoundary)
{
  std::size_t marker = _cursor;

  // Greedy.
  std::size_t i = 0;
  for (; i < 36 && marker + i < _eos; i++)
  {
    if (uuid_pattern[i] == 'x')
    {
      if (! isHexDigit (_text[marker + i]))
        break;
    }
    else if (uuid_pattern[i] != _text[marker + i])
      break;
  }

  if (i >= uuid_min_length              &&
      (! endBoundary                    ||
       ! _text[marker + i]              ||
       isWhitespace (_text[marker + i]) ||
       isSingleCharOperator (_text[marker + i])))
  {
    token = _text.substr (_cursor, i);
    type = Lexer::Type::uuid;
    _cursor += i;
    return true;
  }

  return false;
}
Пример #7
0
void DefaultLexer::readHexInteger() {
  char c = lookChar();
  while (isDigit(c) || isHexDigit(c)) {
    putChar(c);
    skipChar();
    c = lookChar();
  }
}
Пример #8
0
int isAddress(char *str)
{
	char *ch=NULL;

	ch=str;
	while (ch && *ch != 0)
		if (isHexDigit(*ch++) == 0)
			return 0;
	return 1;
}
Пример #9
0
/* getChar(is)
 * This is one possible character input routine for an input stream.
 * (This version uses the standard input stream.)
 * getChar places next 8-bit character into is->nextChar.
 * It also updates the count of number of 8-bit characters read.
 * The value EOF is obtained when no more input is available.  
 * This code handles 4-bit/6-bit/8-bit channels.
 */
void getChar(sexpInputStream *is)
{
    int c;
    if (is->nextChar == EOF) {
	is->byteSize = 8;
	return;
    }
    while (TRUE) {
	c = is->nextChar = fgetc(is->inputFile);
	if (c == EOF)
	    return;
	if ((is->byteSize == 6 && (c == '|' || c == '}'))
	    || (is->byteSize == 4 && (c == '#')))
	    /* end of region reached; return terminating character, after
	       checking for unused bits */
	{
	    if (is->nBits > 0 && (((1 << is->nBits) - 1) & is->bits) != 0)
		ErrorMessage(WARNING,
			     "%d-bit region ended with %d unused bits left-over",
			     is->byteSize, is->nBits);
	    changeInputByteSize(is, 8);
	    return;
	} else if (is->byteSize != 8 && isWhiteSpace(c))
	    ;	/* ignore white space in hex and base64 regions */
	else if (is->byteSize == 6 && c == '=')
	    ;	/* ignore equals signs in base64 regions */
	else if (is->byteSize == 8) {
	    is->count++;
	    return;
	} else if (is->byteSize < 8) {
	    is->bits = is->bits << is->byteSize;
	    is->nBits += is->byteSize;
	    if (is->byteSize == 6 && isBase64Digit(c))
		is->bits = is->bits | base64value[c];
	    else if (is->byteSize == 4 && isHexDigit(c))
		is->bits = is->bits | hexvalue[c];
	    else
		ErrorMessage(ERROR,
			     "character %c found in %d-bit coding region",
			     (int) is->nextChar, is->byteSize);
	    if (is->nBits >= 8) {
		is->nextChar = (is->bits >> (is->nBits - 8)) & 0xFF;
		is->nBits -= 8;
		is->count++;
		return;
	    }
	}
Пример #10
0
void Scanner::readHex( YYSTYPE & lval )
//-------------------------------------
{
    const int   MaxBufLen = 10;
    char        buffer[ MaxBufLen ];
    int         bufPos;         // position in buffer

    get();  // move past x (ie. 0x7f)

    for( bufPos = 0; bufPos < MaxBufLen; bufPos += 1 ) {
        if( isEOF() || !isHexDigit() ) break;
        buffer[ bufPos ] = (char) _current;
        get();
    }

    assert( bufPos < MaxBufLen );

    buffer[ bufPos ] = '\0';
    lval = (YYSTYPE) strtol( buffer, NULL, 16 );
}
    bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
    {
        if (source.nextChar() != '0')
            return false;

        juce_wchar c = source.nextChar();
        if (c != 'x' && c != 'X')
            return false;

        int numDigits = 0;
        while (isHexDigit (source.peekNextChar()))
        {
            ++numDigits;
            source.skip();
        }

        if (numDigits == 0)
            return false;

        return skipNumberSuffix (source);
    }
Пример #12
0
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::hex
//   0xX+
bool Lexer::isHexNumber (std::string& token, Lexer::Type& type)
{
  std::size_t marker = _cursor;

  if (_eos - marker >= 3 &&
      _text[marker + 0] == '0' &&
      _text[marker + 1] == 'x')
  {
    marker += 2;

    while (isHexDigit (_text[marker]))
      ++marker;

    if (marker - _cursor > 2)
    {
      token = _text.substr (_cursor, marker - _cursor);
      type = Lexer::Type::hex;
      _cursor = marker;
      return true;
    }
  }

  return false;
}
Пример #13
0
Token Scanner::readNumber()
{
    if (!m_src.isEnd()) {
        QChar ch = m_src.peek();
        if (ch.toLower() == 'b') {
            m_src.move();
            while (isBinaryDigit(m_src.peek()))
                m_src.move();
        } else if (ch.toLower() == 'o') {
            m_src.move();
            while (isOctalDigit(m_src.peek()))
                m_src.move();
        } else if (ch.toLower() == 'x') {
            m_src.move();
            while (isHexDigit(m_src.peek()))
                m_src.move();
        } else { // either integer or float number
            return readFloatNumber();
        }
        if (isValidIntegerSuffix(m_src.peek()))
            m_src.move();
    }
    return Token(Token::Number, m_src.anchor(), m_src.length());
}
Пример #14
0
	void Value::readString(std::istream &input, std::string &result) {
		bool noErrors = true, noUnicodeError = true;
		char currentCharacter, tmpCharacter;
		std::stringstream constructing;
		std::string tmpStr(4, ' ');
		std::stringstream tmpSs;
		int32_t tmpInt;
		String32 tmpStr32;
		unsigned int tmpCounter;

		// As long as there aren't any errors and that we haven't reached the
		// end of the input stream.
		while (noErrors && !input.eof()) {
			input.get(currentCharacter);

			if (input.good()) {
				if (currentCharacter & 0x80) { // 0x80 --> 10000000
					// The character is part of an utf8 character.
					constructing << currentCharacter;

				} else if (currentCharacter == Strings::Json::Escape::BEGIN_ESCAPE) {
					if (!input.eof()) {
						input.get(tmpCharacter);

						switch (tmpCharacter) {
						case Strings::Json::Escape::QUOTATION_MARK:
							constructing << Strings::Std::QUOTATION_MARK;
							break;

						case Strings::Json::Escape::REVERSE_SOLIDUS:
							constructing << Strings::Std::REVERSE_SOLIDUS;
							break;

						case Strings::Json::Escape::SOLIDUS:
							constructing << Strings::Std::SOLIDUS;
							break;

						case Strings::Json::Escape::BACKSPACE:
							constructing << Strings::Std::BACKSPACE;
							break;

						case Strings::Json::Escape::FORM_FEED:
							constructing << Strings::Std::FORM_FEED;
							break;

						case Strings::Json::Escape::LINE_FEED:
							constructing << Strings::Std::LINE_FEED;
							break;

						case Strings::Json::Escape::CARRIAGE_RETURN:
							constructing << Strings::Std::CARRIAGE_RETURN;
							break;

						case Strings::Json::Escape::TAB:
							constructing << Strings::Std::TAB;
							break;

						case Strings::Json::Escape::BEGIN_UNICODE:
							// TODO: Check for utf16 surrogate pairs.
							tmpCounter = 0;
							tmpStr.clear();
							tmpStr = "    ";
							noUnicodeError = true;

							while (tmpCounter < 4 && !input.eof()) {
								input.get(tmpCharacter);

								if (isHexDigit(tmpCharacter)) {
									tmpStr[tmpCounter] = tmpCharacter;

								} else {
									noUnicodeError = false;
									std::cout << "Invalid \\u character, skipping it." << std::endl;
								}

								++tmpCounter;
							}

							if (noUnicodeError) {
								tmpSs.str("");
								tmpSs << std::hex << tmpStr;
								tmpSs >> tmpInt;
								tmpStr32.clear();
								tmpStr32.push_back(tmpInt);
								tmpStr = Convert::encodeToUTF8(tmpStr32);
								constructing << tmpStr;
							}

							break;

						default:
							break;
						}
					}

				} else if (currentCharacter == '"') {
					result = constructing.str();
					noErrors = false;

				} else {
					constructing << currentCharacter;
				}
			}
Пример #15
0
int	GetToken(unsigned char* buffer,int start,unsigned char* outBuffer,bool escapeChars,int *nextChar)
{
	int				count,outcount = 0;
	bool			finished(false),quotesOn(false);
	unsigned char	temp;
	
	for (count=0;count<strlen((const char*)buffer) && !finished;count++)
	{
		if (escapeChars)
		{
			
			if (buffer[start+count] == '\\')
			{
				count++;
				
				switch(buffer[start+count])
				{
					case 'n':
						outBuffer[outcount++] = 0x0a;
						break;
					case 't':
						outBuffer[outcount++] = 0x09;
						break;
					case 'v':
						outBuffer[outcount++] = 0x0b;
						break;
					case 'b':
						outBuffer[outcount++] = 0x08;
						break;
					case 'r':
						outBuffer[outcount++] = 0x0d;
						break;
					case 'f':
						outBuffer[outcount++] = 0x0c;
						break;
					case 'a':
						outBuffer[outcount++] = 0x07;
						break;
					case '\\':
						outBuffer[outcount++] = '\\';
						break;
					case '\"':
						outBuffer[outcount++] = '\"';
						break;

					case ' ':
						/* remove leading spaces */
						if (outcount != 0)
							outBuffer[outcount++] = ' ';
						break;
						
					case 'x':
						/* do hex coverstion */
						if (isHexDigit(buffer[start+count+1]) && isHexDigit(buffer[start+count+2]))
						{
							outBuffer[outcount++] = toHex(buffer[start+count+1],buffer[start+count+2]);
						}
						count += 2;
						break;

					case '0':
						if (!isdigit(buffer[start+count+1]))
						{
							outBuffer[outcount++] = '\0';
							break;
						}
					case '1':
					case '2':
					case '3':
					case '4':
					case '5':
					case '6':
					case '7':
						/* do Octal conersion */
						outBuffer[outcount++] = toOctal(buffer[start+count],buffer[start+count+1],buffer[start+count+2]);
						count += 2;
						break;
					
					default:
						/* bad escape code */
						count++;
						finished = true;
				}
			}
			else if (buffer[start+count] == '\"')
			{
				/* the token is in quotes - so must ignore spaces within */
				quotesOn = !quotesOn;
			}else{
				/* not a escape char - and lose leading spaces */
				if (!(outcount == 0 && buffer[start+count] == ' '))
				{
					if ((!quotesOn && (buffer[start+count] == ' ' || buffer[start+count] == '\n')) || buffer[start+count] == '\0')
					{
						finished = true;
						outBuffer[outcount] = '\0';
					}else{
						outBuffer[outcount++] = buffer[start + count];
					}
				}
			}
		}else{
			/* dont convert escape chrs */
			if (!(outcount == 0 && buffer[start+count] == ' '))
			{
				if (buffer[start+count] == ' ' || buffer[start+count] == '\n' || buffer[start+count] == '\0')
				{
					finished = true;
				}else{
					outBuffer[outcount++] = buffer[start + count];
					outBuffer[outcount] = '\0';
				}
			}
		}
	}

	*nextChar = (start+count);
	
	return outcount;
}
Пример #16
0
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::string
//   '|"
//   [ U+XXXX | \uXXXX | \" | \' | \\ | \/ | \b | \f | \n | \r | \t | . ]
//   '|"
bool Lexer::isString (std::string& token, Lexer::Type& type, int quote)
{
  std::size_t marker = _cursor;

  if (_text[marker] == quote)
  {
    token = _text.substr (marker++, 1);

    int c;
    while ((c = _text[marker]))
    {
      // EOS.
      if (c == quote)
        break;

      // Unicode U+XXXX or \uXXXX codepoint.
      else if (_eos - marker >= 6 &&
               ((_text[marker + 0] == 'U' && _text[marker + 1] == '+') ||
                (_text[marker + 0] == '\\' && _text[marker + 1] == 'u')) &&
               isHexDigit (_text[marker + 2]) &&
               isHexDigit (_text[marker + 3]) &&
               isHexDigit (_text[marker + 4]) &&
               isHexDigit (_text[marker + 5]))
      {
        token += utf8_character (
                   hexToInt (
                     _text[marker + 2],
                     _text[marker + 3],
                     _text[marker + 4],
                     _text[marker + 5]));
        marker += 6;
      }

      // An escaped thing.
      else if (c == '\\')
      {
        c = _text[++marker];

        switch (c)
        {
        case '"':  token += (char) 0x22; ++marker; break;
        case '\'': token += (char) 0x27; ++marker; break;
        case '\\': token += (char) 0x5C; ++marker; break;
        case 'b':  token += (char) 0x08; ++marker; break;
        case 'f':  token += (char) 0x0C; ++marker; break;
        case 'n':  token += (char) 0x0A; ++marker; break;
        case 'r':  token += (char) 0x0D; ++marker; break;
        case 't':  token += (char) 0x09; ++marker; break;
        case 'v':  token += (char) 0x0B; ++marker; break;

        // This pass-through default case means that anythign can be escaped
        // harmlessly. In particular 'quote' is included, if it not one of the
        // above characters.
        default:   token += (char) c;    ++marker; break;
        }
      }

      // Ordinary character.
      else
        token += utf8_character (utf8_next_char (_text, marker));
    }

    if (_text[marker] == quote)
    {
      token += _text.substr (marker++, 1);
      type = Lexer::Type::string;
      _cursor = marker;
      return true;
    }
  }

  return false;
}
Пример #17
0
bool HTMLLexer::ConsumePossibleEntity( VString &outEntityConsumed, UniChar &outCharacterToAdd )
{
	if (!fLexerInput->HasMoreChars())	return false;
	xbox_assert( fLexerInput->PeekAtNextChar() == CHAR_AMPERSAND );

	// In HTML, escape sequences come in one of three forms:
	//	&#DDDD; (where D is decimal number)
	//	&#xHHHH; (where H is hexidecimal number -- case insensitive)
	//	&entity; (where entity is a common entity reference)
	// We've already read the ampersand, so now we want to figure out which of
	// the three cases we're dealing with.  If we don't match any of the three
	// cases, then we just assume we really had regular text and the user just
	// screwed up.

	// Consume the current character to see what sort of escape sequence we're dealing with
	fLexerInput->MoveToNextChar();	// Eat the &
	if (!fLexerInput->HasMoreChars()) {
		fLexerInput->MoveToPreviousChar();
		return false;
	}
	UniChar escapeType = fLexerInput->MoveToNextChar();
	sLONG charactersToRevert = 2;
	outEntityConsumed = "&";
	outEntityConsumed.AppendUniChar( escapeType );
	switch (escapeType) {
		case CHAR_NUMBER_SIGN: {
			// We either have a hex literal or a decimal literal following the # sign.  So we
			// will check to see which we've got and go from there
			if (!fLexerInput->HasMoreChars())	break;
			UniChar literalType = fLexerInput->MoveToNextChar();
			outEntityConsumed.AppendUniChar( literalType );
			charactersToRevert++;
			switch (literalType) {
				case CHAR_LATIN_CAPITAL_LETTER_X:
				case CHAR_LATIN_SMALL_LETTER_X: {
					// We now want to add any number of hex digits to the stream.  Though, if we go over
					// four hex digits, then we're outside of the range of any legal unicode code point, and
					// so I am going to do a hard-coded stop there.
					unsigned short value = 0;

					for (sLONG i = 0; i < 4; i++) {
						UniChar c = CHAR_CONTROL_0000;
						if (fLexerInput->HasMoreChars()) {
							c = fLexerInput->MoveToNextChar();
							charactersToRevert++;
							if (!isHexDigit( c ))	break;
							// Now we want to add the hex digit we got into our ultimate value
							value += sixteenToThePowerOf( i ) * getHexValueFromCharacter( c );
							outEntityConsumed.AppendUniChar( c );
						} else break;
					}

					// The last character needs to be a semi-colon, or else it's not a legal entity
					if (!fLexerInput->HasMoreChars()) break;
					if (fLexerInput->MoveToNextChar() != CHAR_SEMICOLON) {
						charactersToRevert++;
						break;
					}

					outEntityConsumed.AppendUniChar( ';' );

					// Add the value as a UniChar to our stream
					outCharacterToAdd = (UniChar)value;
					return true;
				} break;
				default: {
					// We want to test to see whether the character we read is a legal decimal digit.  If it
					// is, then we have a character literal.  If not, the user screwed up and we can just bail.
					if (isDecimalDigit( literalType )) {
						// We're going to put the character back just to parse it again.  It makes the logic easier
						fLexerInput->MoveToPreviousChar();

						// We know that we've got some digits we care about.  Now we need to find out how many,
						// and we'll put the cap at something sensible, like 5 characters (which can cover all of
						// the unicode code points in existence).
						unsigned short value = 0;

						for (sLONG i = 0; i < 5; i++) {
							UniChar c = CHAR_CONTROL_0000;
							if (fLexerInput->HasMoreChars()) {
								c = fLexerInput->MoveToNextChar();
								charactersToRevert++;
								if (!isDecimalDigit( c ))	break;
								// Now we want to add the decimal digit we got into our ultimate value
								value += tenToThePowerOf( i ) * (c - '0');
								outEntityConsumed.AppendUniChar( c );
							} else break;
						}

						// The last character needs to be a semi-colon, or else it's not a legal entity
						if (!fLexerInput->HasMoreChars()) break;
						if (fLexerInput->MoveToNextChar() != CHAR_SEMICOLON) {
							charactersToRevert++;
							break;
						}

						outEntityConsumed.AppendUniChar( ';' );

						// Add the value as a UniChar to our stream
						outCharacterToAdd = (UniChar)value;
						return true;
					} else break;
				} break;
			}
		} break;
		default: {
			// We found an ampersand and know it's not a numeric literal.  Now we need to see whether it's an
			// entity made to be a bit more human readable.  We're going to grab a bunch of characters from the
			// stream without actually shifting them in.  We cut them off at the semi-colon because all entities
			// must end with one to be legal.  Then, if it matches a known entity, we're set -- otherwise it's just
			// a stream of random characters.  We need a whopping nine character to be able to support &thetasym;
			VString entityName;
			fLexerInput->GetSubString( fLexerInput->GetCurrentPosition(), 9, entityName );

			// Now, let's see where the semi-colon is, and strip it off (as well as everything past it).  If we don't
			// find a semi-colon, then we know we don't have an entity
			VectorOfVString subStrs;
			if (!entityName.GetSubStrings( CHAR_SEMICOLON, subStrs ))	break;

			// The first sub string is the entity name we care about
			if (GetEntityValue( subStrs.front(), outCharacterToAdd )) {
				// We want to eat up as many characters as the entity was consuming
				VIndex size = subStrs.front().GetLength();
				for (VIndex i = 0; i < size; i++) {
					outEntityConsumed.AppendUniChar( fLexerInput->MoveToNextChar() );
				}
				return true;
			}
		} break;
	}

	// Put back the characters we screwed up on
	for (sLONG i = 0; i < charactersToRevert; i++)	fLexerInput->MoveToPreviousChar();

	return false;
}
Пример #18
0
BinInputStream* XMLURL::makeNewStream() const
{
    //
    //  If its a local host, then we short circuit it and use our own file
    //  stream support. Otherwise, we just let it fall through and let the
    //  installed network access object provide a stream.
    //
    if (fProtocol == XMLURL::File)
    {
        if (!fHost || !XMLString::compareIStringASCII(fHost, XMLUni::fgLocalHostString))
        {

            XMLCh* realPath = XMLString::replicate(fPath, fMemoryManager);
            ArrayJanitor<XMLCh> basePathName(realPath, fMemoryManager);

            //
            // Need to manually replace any character reference %xx first
            // HTTP protocol will be done automatically by the netaccessor
            //
            int end = XMLString::stringLen(realPath);
            int percentIndex = XMLString::indexOf(realPath, chPercent, 0, fMemoryManager);

            while (percentIndex != -1) {

                if (percentIndex+2 >= end ||
                    !isHexDigit(realPath[percentIndex+1]) ||
                    !isHexDigit(realPath[percentIndex+2]))
                {
                    XMLCh value1[4];
                    XMLString::moveChars(value1, &(realPath[percentIndex]), 3);
                    value1[3] = chNull;
                    ThrowXMLwithMemMgr2(MalformedURLException
                            , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
                            , realPath
                            , value1
                            , fMemoryManager);
                }

                unsigned int value = (xlatHexDigit(realPath[percentIndex+1]) * 16) + xlatHexDigit(realPath[percentIndex+2]);

                realPath[percentIndex] = XMLCh(value);

                int i =0;
                for (i = percentIndex + 1; i < end - 2 ; i++)
                    realPath[i] = realPath[i+2];
                realPath[i] = chNull;
                end = i;

                percentIndex = XMLString::indexOf(realPath, chPercent, percentIndex, fMemoryManager);
            }


            BinFileInputStream* retStrm = new (fMemoryManager) BinFileInputStream(realPath, fMemoryManager);
            if (!retStrm->getIsOpen())
            {
                delete retStrm;
                return 0;
            }
            return retStrm;
        }
    }

    //
    //  If we don't have have an installed net accessor object, then we
    //  have to just throw here.
    //
    if (!XMLPlatformUtils::fgNetAccessor)
        ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);

    // Else ask the net accessor to create the stream
    return XMLPlatformUtils::fgNetAccessor->makeNew(*this);
}
Пример #19
0
int QScript::Lexer::lex()
{
    int token = 0;
    state = Start;
    ushort stringType = 0; // either single or double quotes
    pos8 = pos16 = 0;
    done = false;
    terminator = false;

    // did we push a token on the stack previously ?
    // (after an automatic semicolon insertion)
    if (stackToken >= 0) {
        setDone(Other);
        token = stackToken;
        stackToken = -1;
    }

    while (!done) {
        switch (state) {
        case Start:
            if (isWhiteSpace()) {
                // do nothing
            } else if (current == '/' && next1 == '/') {
                recordStartPos();
                shift(1);
                state = InSingleLineComment;
            } else if (current == '/' && next1 == '*') {
                recordStartPos();
                shift(1);
                state = InMultiLineComment;
            } else if (current == 0) {
                syncProhibitAutomaticSemicolon();
                if (!terminator && !delimited && !prohibitAutomaticSemicolon) {
                    // automatic semicolon insertion if program incomplete
                    token = QScriptGrammar::T_SEMICOLON;
                    stackToken = 0;
                    setDone(Other);
                } else {
                    setDone(Eof);
                }
            } else if (isLineTerminator()) {
                shiftWindowsLineBreak();
                yylineno++;
                yycolumn = 0;
                bol = true;
                terminator = true;
                syncProhibitAutomaticSemicolon();
                if (restrKeyword) {
                    token = QScriptGrammar::T_SEMICOLON;
                    setDone(Other);
                }
            } else if (current == '"' || current == '\'') {
                recordStartPos();
                state = InString;
                stringType = current;
            } else if (isIdentLetter(current)) {
                recordStartPos();
                record16(current);
                state = InIdentifier;
            } else if (current == '0') {
                recordStartPos();
                record8(current);
                state = InNum0;
            } else if (isDecimalDigit(current)) {
                recordStartPos();
                record8(current);
                state = InNum;
            } else if (current == '.' && isDecimalDigit(next1)) {
                recordStartPos();
                record8(current);
                state = InDecimal;
            } else {
                recordStartPos();
                token = matchPunctuator(current, next1, next2, next3);
                if (token != -1) {
                    if (terminator && !delimited && !prohibitAutomaticSemicolon
                        && (token == QScriptGrammar::T_PLUS_PLUS
                            || token == QScriptGrammar::T_MINUS_MINUS)) {
                        // automatic semicolon insertion
                        stackToken = token;
                        token = QScriptGrammar::T_SEMICOLON;
                    }
                    setDone(Other);
                }
                else {
                    setDone(Bad);
                    err = IllegalCharacter;
                    errmsg = QLatin1String("Illegal character");
                }
            }
            break;
        case InString:
            if (current == stringType) {
                shift(1);
                setDone(String);
            } else if (current == 0 || isLineTerminator()) {
                setDone(Bad);
                err = UnclosedStringLiteral;
                errmsg = QLatin1String("Unclosed string at end of line");
            } else if (current == '\\') {
                state = InEscapeSequence;
            } else {
                record16(current);
            }
            break;
            // Escape Sequences inside of strings
        case InEscapeSequence:
            if (isOctalDigit(current)) {
                if (current >= '0' && current <= '3' &&
                     isOctalDigit(next1) && isOctalDigit(next2)) {
                    record16(convertOctal(current, next1, next2));
                    shift(2);
                    state = InString;
                } else if (isOctalDigit(current) &&
                            isOctalDigit(next1)) {
                    record16(convertOctal('0', current, next1));
                    shift(1);
                    state = InString;
                } else if (isOctalDigit(current)) {
                    record16(convertOctal('0', '0', current));
                    state = InString;
                } else {
                    setDone(Bad);
                    err = IllegalEscapeSequence;
                    errmsg = QLatin1String("Illegal escape squence");
                }
            } else if (current == 'x')
                state = InHexEscape;
            else if (current == 'u')
                state = InUnicodeEscape;
            else {
                if (isLineTerminator()) {
                    shiftWindowsLineBreak();
                    yylineno++;
                    yycolumn = 0;
                    bol = true;
                } else {
                    record16(singleEscape(current));
                }
                state = InString;
            }
            break;
        case InHexEscape:
            if (isHexDigit(current) && isHexDigit(next1)) {
                state = InString;
                record16(QLatin1Char(convertHex(current, next1)));
                shift(1);
            } else if (current == stringType) {
                record16(QLatin1Char('x'));
                shift(1);
                setDone(String);
            } else {
                record16(QLatin1Char('x'));
                record16(current);
                state = InString;
            }
            break;
        case InUnicodeEscape:
            if (isHexDigit(current) && isHexDigit(next1) &&
                 isHexDigit(next2) && isHexDigit(next3)) {
                record16(convertUnicode(current, next1, next2, next3));
                shift(3);
                state = InString;
            } else if (current == stringType) {
                record16(QLatin1Char('u'));
                shift(1);
                setDone(String);
            } else {
                setDone(Bad);
                err = IllegalUnicodeEscapeSequence;
                errmsg = QLatin1String("Illegal unicode escape sequence");
            }
            break;
        case InSingleLineComment:
            if (isLineTerminator()) {
                shiftWindowsLineBreak();
                yylineno++;
                yycolumn = 0;
                terminator = true;
                bol = true;
                if (restrKeyword) {
                    token = QScriptGrammar::T_SEMICOLON;
                    setDone(Other);
                } else
                    state = Start;
            } else if (current == 0) {
                setDone(Eof);
            }
            break;
        case InMultiLineComment:
            if (current == 0) {
                setDone(Bad);
                err = UnclosedComment;
                errmsg = QLatin1String("Unclosed comment at end of file");
            } else if (isLineTerminator()) {
                shiftWindowsLineBreak();
                yylineno++;
            } else if (current == '*' && next1 == '/') {
                state = Start;
                shift(1);
            }
            break;
        case InIdentifier:
            if (isIdentLetter(current) || isDecimalDigit(current)) {
                record16(current);
                break;
            }
            setDone(Identifier);
            break;
        case InNum0:
            if (current == 'x' || current == 'X') {
                record8(current);
                state = InHex;
            } else if (current == '.') {
                record8(current);
                state = InDecimal;
            } else if (current == 'e' || current == 'E') {
                record8(current);
                state = InExponentIndicator;
            } else if (isOctalDigit(current)) {
                record8(current);
                state = InOctal;
            } else if (isDecimalDigit(current)) {
                record8(current);
                state = InDecimal;
            } else {
                setDone(Number);
            }
            break;
        case InHex:
            if (isHexDigit(current))
                record8(current);
            else
                setDone(Hex);
            break;
        case InOctal:
            if (isOctalDigit(current)) {
                record8(current);
            } else if (isDecimalDigit(current)) {
                record8(current);
                state = InDecimal;
            } else {
                setDone(Octal);
            }
            break;
        case InNum:
            if (isDecimalDigit(current)) {
                record8(current);
            } else if (current == '.') {
                record8(current);
                state = InDecimal;
            } else if (current == 'e' || current == 'E') {
                record8(current);
                state = InExponentIndicator;
            } else {
                setDone(Number);
            }
            break;
        case InDecimal:
            if (isDecimalDigit(current)) {
                record8(current);
            } else if (current == 'e' || current == 'E') {
                record8(current);
                state = InExponentIndicator;
            } else {
                setDone(Number);
            }
            break;
        case InExponentIndicator:
            if (current == '+' || current == '-') {
                record8(current);
            } else if (isDecimalDigit(current)) {
                record8(current);
                state = InExponent;
            } else {
                setDone(Bad);
                err = IllegalExponentIndicator;
                errmsg = QLatin1String("Illegal syntax for exponential number");
            }
            break;
        case InExponent:
            if (isDecimalDigit(current)) {
                record8(current);
            } else {
                setDone(Number);
            }
            break;
        default:
            Q_ASSERT_X(0, "Lexer::lex", "Unhandled state in switch statement");
        }

        // move on to the next character
        if (!done)
            shift(1);
        if (state != Start && state != InSingleLineComment)
            bol = false;
    }

    // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
    if ((state == Number || state == Octal || state == Hex)
         && isIdentLetter(current)) {
        state = Bad;
        err = IllegalIdentifier;
        errmsg = QLatin1String("Identifier cannot start with numeric literal");
    }

    // terminate string
    buffer8[pos8] = '\0';

    double dval = 0;
    if (state == Number) {
        dval = qstrtod(buffer8, 0, 0);
    } else if (state == Hex) { // scan hex numbers
        dval = QScript::integerFromString(buffer8, pos8, 16);
        state = Number;
    } else if (state == Octal) {   // scan octal number
        dval = QScript::integerFromString(buffer8, pos8, 8);
        state = Number;
    }

    restrKeyword = false;
    delimited = false;

    switch (parenthesesState) {
    case IgnoreParentheses:
        break;
    case CountParentheses:
        if (token == QScriptGrammar::T_RPAREN) {
            --parenthesesCount;
            if (parenthesesCount == 0)
                parenthesesState = BalancedParentheses;
        } else if (token == QScriptGrammar::T_LPAREN) {
            ++parenthesesCount;
        }
        break;
    case BalancedParentheses:
        parenthesesState = IgnoreParentheses;
        break;
    }

    switch (state) {
    case Eof:
        return 0;
    case Other:
        if(token == QScriptGrammar::T_RBRACE || token == QScriptGrammar::T_SEMICOLON)
            delimited = true;
        return token;
    case Identifier:
        if ((token = findReservedWord(buffer16, pos16)) < 0) {
            /* TODO: close leak on parse error. same holds true for String */
            if (driver) {
                Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
                qsyylval.ustr = 0; // driver->intern(buffer16, pos16);
            } else
                qsyylval.ustr = 0;
            return QScriptGrammar::T_IDENTIFIER;
        }
        if (token == QScriptGrammar::T_CONTINUE || token == QScriptGrammar::T_BREAK
            || token == QScriptGrammar::T_RETURN || token == QScriptGrammar::T_THROW) {
            restrKeyword = true;
        } else if (token == QScriptGrammar::T_IF || token == QScriptGrammar::T_FOR
                   || token == QScriptGrammar::T_WHILE || token == QScriptGrammar::T_WITH) {
            parenthesesState = CountParentheses;
            parenthesesCount = 0;
        } else if (token == QScriptGrammar::T_DO) {
            parenthesesState = BalancedParentheses;
        }
        return token;
    case String:
        if (driver) {
            Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
            qsyylval.ustr = 0; // driver->intern(buffer16, pos16);
        } else
            qsyylval.ustr = 0;
        return QScriptGrammar::T_STRING_LITERAL;
    case Number:
        qsyylval.dval = dval;
        return QScriptGrammar::T_NUMERIC_LITERAL;
    case Bad:
        return -1;
    default:
        Q_ASSERT(!"unhandled numeration value in switch");
        return -1;
    }
}
Пример #20
0
int vfsscanf_(const char *buf, const char *format, va_list arg){
   int buffer_pos = 0;
   int format_pos = 0;
   int string_pos = 0;
   int ret = 0;
   int found = 0;

   int *i;
   float *f;
   char *c;
   int invert;
   float tf;

   int state = 0;  // 0 = last char = normal char
   // 1 = last char = '%'
   // 2 = last char = '\'

   while(format[format_pos]){
      found = 0;
      switch(state){
         case 0:
            switch(format[format_pos]){
               case '%':
                  state = 1;
                  format_pos++;
                  continue;
                  break;

               case '\'':
                  state = 2;
                  format_pos++;
                  continue;
                  break;

               case ' ': // parse whithespaces
                  while(isWhitespace(buf[buffer_pos])){
                     buffer_pos++;
                     found++;
                  }
                  found++;
                  break;

               default:
                  if(format[format_pos] != buf[buffer_pos++]){
                     return(ret);
                  }
                  found++;
            }
            break;

         case 1:
            switch(format[format_pos]){
               case '%':
                  if(buf[buffer_pos++] != '%'){
                     return(ret);
                  }
                  found++;
                  break;

               case 'c':
                  c = va_arg(arg, char *);
                  *c = buf[buffer_pos++];
                  found++;
                  break;

               case 's':
                  string_pos = 0;
                  c = va_arg(arg, char *);
                  //while(isChar(buf[buffer_pos])){
                  while(!isWhitespace(buf[buffer_pos])){
                     c[string_pos] = buf[buffer_pos++];
                     string_pos++;
                     c[string_pos] = '\0';
                     found++;
                  }
                  break;

               case 'N':
                  string_pos = 0;
                  c = va_arg(arg, char *);
                  //while(isChar(buf[buffer_pos])){
                  while(isNameChar(buf[buffer_pos])){
                     c[string_pos] = buf[buffer_pos++];
                     string_pos++;
                     c[string_pos] = '\0';
                     found++;
                  }
                  break;

               case 'i':
                  i = va_arg(arg, int *);
                  *i = 0;
                  invert = 1;
                  if(buf[buffer_pos] == '-'){
                     invert = -1;
                     buffer_pos++;
                  }
                  else if(buf[buffer_pos] == '+'){
                     buffer_pos++;
                  }
                  while(isDecDigit(buf[buffer_pos])){
                     *i *= 10;
                     *i += buf[buffer_pos++] - '0';
                     found++;
                  }
                  *i *= invert;
                  break;

               case 'b':
                  i = va_arg(arg, int *);
                  *i = 0;
                  invert = 1;
                  if(buf[buffer_pos] == '-'){
                     invert = -1;
                     buffer_pos++;
                  }
                  else if(buf[buffer_pos] == '+'){
                     buffer_pos++;
                  }
                  if(!(buf[buffer_pos] == 'b' || buf[buffer_pos] == 'B')){
                     return(ret);
                  }
                  buffer_pos++;
                  while(isBinDigit(buf[buffer_pos])){
                     *i *= 2;
                     *i += buf[buffer_pos++] - '0';
                     found++;
                  }
                  *i *= invert;
                  break;

               case 'h':
                  i = va_arg(arg, int *);
                  *i = 0;
                  invert = 1;
                  if(buf[buffer_pos] == '-'){
                     invert = -1;
                     buffer_pos++;
                  }
                  else if(buf[buffer_pos] == '+'){
                     buffer_pos++;
                  }
                  if(buf[buffer_pos++] != '0'){
                     return(ret);
                  }
                  if(buf[buffer_pos++] != 'x'){
                     return(ret);
                  }
                  while(isHexDigit(buf[buffer_pos])){
                     *i *= 16;
                     if(buf[buffer_pos] < 'A'){
                        *i += buf[buffer_pos++] - '0';
                     }
                     else if(buf[buffer_pos] < 'a'){
                        *i += buf[buffer_pos++] - 'A' + 10;
                     }
                     else{
                        *i += buf[buffer_pos++] - 'a' + 10;
                     }
                     found++;
                  }
                  *i *= invert;
                  break;

               case 'f':
                  f = va_arg(arg, float *);
                  *f = 0;
                  tf = 10;
                  invert = 1;
                  if(buf[buffer_pos] == '-'){
                     invert = -1;
                     buffer_pos++;
                  }
                  else if(buf[buffer_pos] == '+'){
                     buffer_pos++;
                  }
                  while(isDecDigit(buf[buffer_pos])){
                     *f *= 10;
                     *f += buf[buffer_pos++] - '0';
                     found++;
                  }
                  if(buf[buffer_pos] == '.'){
                     buffer_pos++;
                     while(isDecDigit(buf[buffer_pos])){
                        *f += (buf[buffer_pos++] - '0') / tf;
                        tf *= 10;
                        found++;
                     }
                  }
                  *f *= invert;
                  break;

               default:
                  return(ret);
                  break;
            }
            state = 0;
            break;

               case 2:
            switch(format[format_pos]){
               case '\'':
                  if(buf[buffer_pos++] != '\''){
                     return(ret);
                  }
                  found++;
                  break;

               case 'n':
                  if(buf[buffer_pos++] != '\n'){
                     return(ret);
                  }
                  found++;
                  break;

               default:
                  return(ret);
                  break;
            }
            state = 0;
            break;

               default:
            break;
            }

            format_pos++;
            if(!found){
               return(ret);
            }
            ret++;
            }
            return(ret);
      }
Пример #21
0
////////////////////////////////////////////////////////////////////////////////
// Full implementation of an unquoted word.  Includes:
//   one\ two
//   abcU+0020def
//   abc\u0020def
//   a\tb
//
// Ends at:
//   Lexer::isEOS
//   Lexer::isWhitespace
//   Lexer::isHardBoundary
bool Lexer::readWord (
  const std::string& text,
  std::string::size_type& cursor,
  std::string& word)
{
  std::string::size_type eos = text.length ();

  word = "";
  int c;
  int prev = 0;
  while ((c = text[cursor]))  // Handles EOS.
  {
    // Unquoted word ends on white space.
    if (Lexer::isWhitespace (c))
      break;

    // Parentheses mostly.
    if (prev && Lexer::isHardBoundary (prev, c))
      break;

    // Unicode U+XXXX or \uXXXX codepoint.
    else if (eos - cursor >= 6 &&
             ((text[cursor + 0] == 'U'  && text[cursor + 1] == '+') ||
              (text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
             isHexDigit (text[cursor + 2]) &&
             isHexDigit (text[cursor + 3]) &&
             isHexDigit (text[cursor + 4]) &&
             isHexDigit (text[cursor + 5]))
    {
      word += utf8_character (
                hexToInt (
                  text[cursor + 2],
                  text[cursor + 3],
                  text[cursor + 4],
                  text[cursor + 5]));
      cursor += 6;
    }

    // An escaped thing.
    else if (c == '\\')
    {
      c = text[++cursor];

      switch (c)
      {
      case '"':  word += (char) 0x22; ++cursor; break;
      case '\'': word += (char) 0x27; ++cursor; break;
      case '\\': word += (char) 0x5C; ++cursor; break;
      case 'b':  word += (char) 0x08; ++cursor; break;
      case 'f':  word += (char) 0x0C; ++cursor; break;
      case 'n':  word += (char) 0x0A; ++cursor; break;
      case 'r':  word += (char) 0x0D; ++cursor; break;
      case 't':  word += (char) 0x09; ++cursor; break;
      case 'v':  word += (char) 0x0B; ++cursor; break;

      // This pass-through default case means that anything can be escaped
      // harmlessly. In particular 'quote' is included, if it not one of the
      // above characters.
      default:   word += (char) c;    ++cursor; break;
      }
    }

    // Ordinary character.
    else
      word += utf8_character (utf8_next_char (text, cursor));

    prev = c;
  }

  return word.length () > 0 ? true : false;
}
Пример #22
0
/**
 * The string stream contains the value '0x' or '0X'.
 * @returns HEX_NUMERAL |
 *          HEX_NUMERAL_WITH_INT_TYPE_SUFFIX |
 *          HEXADECIMAL_FLOATING_POINT_LITERAL |
 *          ERROR
 */
LiteralToken LiteralSupport::getHexNumeral(u32string &ss) {
  // We save the start position of the numeral for error diagnosis.
  int start = src->getCursor() - 2;

  // Lookahead and confirm that we have valid hex digit.
  if (!(isHexDigit(src->peekChar()) || src->peekChar() == '.')) {
    diag->addErr(c4::ERR_NVAL_HEX, start, src->getCursor());
    return LiteralToken::ERROR;
  }

  bool seenPeriod = false;

  if (src->peekChar() == '.') {
    if (!isHexDigit(src->peekChar(1))) {
      src->ungetChar(2);
      diag->addErr(c4::ERR_NVAL_HEX, start, src->getCursor());
      return LiteralToken::ERROR;
    }

    ss += src->getChar(); // consume '.'
    seenPeriod = true;
  }

  // Consume whole or fractional digits
  consumeDigitsPOrUnderscores(ss, isHexDigit);

  if (!seenPeriod) {
    if (src->peekChar() == '.') {
      ss += src->getChar(); // consume '.'
      seenPeriod = true;
    }

    // Consume fractional digits
    consumeDigitsPOrUnderscores(ss, isHexDigit);
  }

  // If we didn't see '.' and the next char is not a binary exponent indicator
  // we know that this is an integer.
  if (!seenPeriod && !isBinaryExponentIndicator(src->peekChar())) {
    // Check int type suffix
    char peek = src->peekChar();
    if (isIntegerTypeSuffix(peek)) {
      ss += src->getChar(); // append and consume suffix
      return LiteralToken::HEX_NUMERAL_WITH_INT_TYPE_SUFFIX;
    }

    return LiteralToken::HEX_NUMERAL;
  }

  // We have a floating point.
  // The binary exponent indicator is mandatory.
  if (!isBinaryExponentIndicator(src->peekChar())) {
    diag->addErr(c4::ERR_NVAL_HEX, start, src->getCursor());
    return LiteralToken::ERROR;
  }

  // Consume the binary exponent indicator: 'p' or 'P'
  ss += src->getChar();

  // Sign(opt)
  if (isSign(src->peekChar())) {
    ss += src->getChar(); // consume '+' or '-'
  }

  // Digits
  int digitCount = consumeDigitsPOrUnderscores(ss, isDecimalDigit);
  if (digitCount <= 0) {
    // Invalid or missing Signed integer
    diag->addErr(c4::ERR_NVAL_HEX, start, src->getCursor());
    return LiteralToken::ERROR;
  }

  // FloatTypeSuffix(opt)
  if (isFloatTypeSuffix(src->peekChar())) {
    ss += src->getChar(); // consume one of: 'f', 'F', 'd' or 'D'
  }

  return LiteralToken::HEXADECIMAL_FLOATING_POINT_LITERAL;
}
Пример #23
0
QString Highlighter::parseToke( QString &text,QColor &color ){
    if( !text.length() ) return "";

    int i=0,n=text.length();
    QChar c=text[i++];

    bool monkeyFile=_editor->isMonkey();

    if( c<=' ' ){
        while( i<n && text[i]<=' ' ) ++i;
    }else if( isAlpha(c) ){
        while( i<n && isIdent(text[i]) ) ++i;
        color=_identifiersColor;
        if( monkeyFile && _keyWords.contains( text.left(i).toLower()  ) ) color=_keywordsColor;
    }else if( c=='0' && !monkeyFile ){
        if( i<n && text[i]=='x' ){
            for( ++i;i<n && isHexDigit( text[i] );++i ){}
        }else{
            for( ;i<n && isOctDigit( text[i] );++i ){}
        }
        color=_numbersColor;
    }else if( isDigit(c) || (c=='.' && i<n && isDigit(text[i])) ){
        bool flt=(c=='.');
        while( i<n && isDigit(text[i]) ) ++i;
        if( !flt && i<n && text[i]=='.' ){
            ++i;
            flt=true;
            while( i<n && isDigit(text[i]) ) ++i;
        }
        if( i<n && (text[i]=='e' || text[i]=='E') ){
            flt=true;
            if( i<n && (text[i]=='+' || text[i]=='-') ) ++i;
            while( i<n && isDigit(text[i]) ) ++i;
        }
        color=_numbersColor;
    }else if( c=='%' && monkeyFile && i<n && isBinDigit( text[i] ) ){
        for( ++i;i<n && isBinDigit( text[i] );++i ){}
        color=_numbersColor;
    }else if( c=='$' && monkeyFile && i<n && isHexDigit( text[i] ) ){
        for( ++i;i<n && isHexDigit( text[i] );++i ){}
        color=_numbersColor;
    }else if( c=='\"' ){
        if( monkeyFile ){
            for( ;i<n && text[i]!='\"';++i ){}
        }else{
            for( ;i<n && text[i]!='\"';++i ){
                if( text[i]=='\\' && i+1<n && text[i+1]=='\"' ) ++i;
            }
        }
        if( i<n ) ++i;
        color=_stringsColor;
    }else if( !monkeyFile && c=='/' && i<n && text[i]=='/' ){
        for( ++i;i<n && text[i]!='\n';++i ){}
        if( i<n ) ++i;
        color=_commentsColor;
    }else if( c=='\'' ){
        if( monkeyFile ){
            for( ;i<n && text[i]!='\n';++i ){}
            if( i<n ) ++i;
            color=_commentsColor;
        }else{
            for( ;i<n && text[i]!='\'';++i ){
                if( text[i]=='\\' && i+1<n && text[i+1]=='\'' ) ++i;
            }
            if( i<n ) ++i;
            color=_stringsColor;
        }
    }else{
        color=_defaultColor;
    }
    QString t=text.left(i);
    text=text.mid(i);
    return t;
}
bool consumeHTMLEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
{
    ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
    ASSERT(!notEnoughCharacters);
    ASSERT(decodedEntity.isEmpty());

    enum EntityState {
        Initial,
        Number,
        MaybeHexLowerCaseX,
        MaybeHexUpperCaseX,
        Hex,
        Decimal,
        Named
    };
    EntityState entityState = Initial;
    UChar32 result = 0;
    Vector<UChar, 10> consumedCharacters;

    while (!source.isEmpty()) {
        UChar cc = *source;
        switch (entityState) {
        case Initial: {
            if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&')
                return false;
            if (additionalAllowedCharacter && cc == additionalAllowedCharacter)
                return false;
            if (cc == '#') {
                entityState = Number;
                break;
            }
            if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
                entityState = Named;
                continue;
            }
            return false;
        }
        case Number: {
            if (cc == 'x') {
                entityState = MaybeHexLowerCaseX;
                break;
            }
            if (cc == 'X') {
                entityState = MaybeHexUpperCaseX;
                break;
            }
            if (cc >= '0' && cc <= '9') {
                entityState = Decimal;
                continue;
            }
            source.push('#');
            return false;
        }
        case MaybeHexLowerCaseX: {
            if (isHexDigit(cc)) {
                entityState = Hex;
                continue;
            }
            source.push('#');
            source.push('x');
            return false;
        }
        case MaybeHexUpperCaseX: {
            if (isHexDigit(cc)) {
                entityState = Hex;
                continue;
            }
            source.push('#');
            source.push('X');
            return false;
        }
        case Hex: {
            if (cc >= '0' && cc <= '9')
                result = result * 16 + cc - '0';
            else if (cc >= 'a' && cc <= 'f')
                result = result * 16 + 10 + cc - 'a';
            else if (cc >= 'A' && cc <= 'F')
                result = result * 16 + 10 + cc - 'A';
            else {
                if (cc == ';')
                    source.advanceAndASSERT(cc);
                return convertToUTF16(legalEntityFor(result), decodedEntity);
            }
            break;
        }
        case Decimal: {
            if (cc >= '0' && cc <= '9')
                result = result * 10 + cc - '0';
            else {
                if (cc == ';')
                    source.advanceAndASSERT(cc);
                return convertToUTF16(legalEntityFor(result), decodedEntity);
            }
            break;
        }
        case Named: {
            HTMLEntitySearch entitySearch;
            while (!source.isEmpty()) {
                cc = *source;
                entitySearch.advance(cc);
                if (!entitySearch.isEntityPrefix())
                    break;
                consumedCharacters.append(cc);
                source.advanceAndASSERT(cc);
            }
            notEnoughCharacters = source.isEmpty();
            if (notEnoughCharacters) {
                // We can't an entity because there might be a longer entity
                // that we could match if we had more data.
                unconsumeCharacters(source, consumedCharacters);
                return false;
            }
            if (!entitySearch.mostRecentMatch()) {
                ASSERT(!entitySearch.currentValue());
                unconsumeCharacters(source, consumedCharacters);
                return false;
            }
            if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) {
                // We've consumed too many characters.  We need to walk the
                // source back to the point at which we had consumed an
                // actual entity.
                unconsumeCharacters(source, consumedCharacters);
                consumedCharacters.clear();
                const int length = entitySearch.mostRecentMatch()->length;
                const UChar* reference = entitySearch.mostRecentMatch()->entity;
                for (int i = 0; i < length; ++i) {
                    cc = *source;
                    ASSERT_UNUSED(reference, cc == *reference++);
                    consumedCharacters.append(cc);
                    source.advanceAndASSERT(cc);
                    ASSERT(!source.isEmpty());
                }
                cc = *source;
            }
            if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
                || !additionalAllowedCharacter
                || !(isAlphaNumeric(cc) || cc == '=')) {
                return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity);
            }
            unconsumeCharacters(source, consumedCharacters);
            return false;
        }
        }
        consumedCharacters.append(cc);
        source.advanceAndASSERT(cc);
    }
    ASSERT(source.isEmpty());
    notEnoughCharacters = true;
    unconsumeCharacters(source, consumedCharacters);
    return false;
}
 Char PushbackString::nextHex() {
   Char c = next();
   if ( c == 0 ) return 0;
   if ( isHexDigit( c ) ) return c;
   return 0;
 }
Пример #26
0
////////////////////////////////////////////////////////////////////////////////
// Full implementation of a quoted word.  Includes:
//   '\''
//   '"'
//   "'"
//   "\""
//   'one two'
// Result includes the quotes.
bool Lexer::readWord (
  const std::string& text,
  const std::string& quotes,
  std::string::size_type& cursor,
  std::string& word)
{
  if (quotes.find (text[cursor]) == std::string::npos)
    return false;

  std::string::size_type eos = text.length ();
  int quote = text[cursor++];
  word = quote;

  int c;
  while ((c = text[cursor]))
  {
    // Quoted word ends on a quote.
    if (quote && quote == c)
    {
      word += utf8_character (utf8_next_char (text, cursor));
      break;
    }

    // Unicode U+XXXX or \uXXXX codepoint.
    else if (eos - cursor >= 6 &&
             ((text[cursor + 0] == 'U'  && text[cursor + 1] == '+') ||
              (text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
             isHexDigit (text[cursor + 2]) &&
             isHexDigit (text[cursor + 3]) &&
             isHexDigit (text[cursor + 4]) &&
             isHexDigit (text[cursor + 5]))
    {
      word += utf8_character (
                hexToInt (
                  text[cursor + 2],
                  text[cursor + 3],
                  text[cursor + 4],
                  text[cursor + 5]));
      cursor += 6;
    }

    // An escaped thing.
    else if (c == '\\')
    {
      c = text[++cursor];

      switch (c)
      {
      case '"':  word += (char) 0x22; ++cursor; break;
      case '\'': word += (char) 0x27; ++cursor; break;
      case '\\': word += (char) 0x5C; ++cursor; break;
      case 'b':  word += (char) 0x08; ++cursor; break;
      case 'f':  word += (char) 0x0C; ++cursor; break;
      case 'n':  word += (char) 0x0A; ++cursor; break;
      case 'r':  word += (char) 0x0D; ++cursor; break;
      case 't':  word += (char) 0x09; ++cursor; break;
      case 'v':  word += (char) 0x0B; ++cursor; break;

      // This pass-through default case means that anything can be escaped
      // harmlessly. In particular 'quote' is included, if it not one of the
      // above characters.
      default:   word += (char) c;    ++cursor; break;
      }
    }

    // Ordinary character.
    else
      word += utf8_character (utf8_next_char (text, cursor));
  }

  // Verify termination.
  return word[0]                  == quote &&
         word[word.length () - 1] == quote &&
         word.length () >= 2;
}