Exemplo n.º 1
0
bool HTMLLexer::IsIdentifierCharacter( UniChar inChar )
{
	return IsIdentifierStart( inChar ) && !(
			inChar == CHAR_SOLIDUS ||
			inChar == CHAR_LESS_THAN_SIGN ||
			inChar == CHAR_GREATER_THAN_SIGN ||
			inChar == CHAR_AMPERSAND ||
			inChar == CHAR_EQUALS_SIGN
		);
}
Exemplo n.º 2
0
bool HTMLLexer::AdvanceOneToken( int &outToken, TokenList *outTokens )
{
	outToken = -1;

	if (!fLexerInput)
		return false;

	// There can be any combination of newlines and whitespaces preceeding semantic tokens,
	// so we're going to loop until we don't find either.
	while (fLexerInput->HasMoreChars())
	{
		bool consumedWhitespace = false;
		bool consumedNewLine = false;
		if (!SkipWhitespaces( consumedWhitespace, outTokens ))
			return false;

		// HTML also treats newlines as a whitespace
		while (fLexerInput->HasMoreChars() && IsLineEnding( fLexerInput->PeekAtNextChar() ))
		{
			// Eat the line ending
			ConsumeLineEnding( fLexerInput->MoveToNextChar() );
			consumedNewLine = true;
		}

		// If we're done consuming newlines and whitespaces, then we're done with this loop
		if (!consumedWhitespace && !consumedNewLine)
			break;
	}
	if (!fLexerInput->HasMoreChars())
		return false;

	// Take a peek at what sort of token we're about to deal with. 
	UniChar	uChar = fLexerInput->PeekAtNextChar();
	sLONG stringType;
	sLONG stringValue;

	if( (outToken = ConsumePossiblePunctuation(uChar, outTokens)) != 0 )
	{
	}
	else if (IsStringStart( uChar, stringType, stringValue ))
	{
		VString	vstrQuoted;
		if (!ConsumeString( &vstrQuoted, outTokens, stringType, stringValue ))
		{
			return false;
		}
		outToken = stringValue;
		fLastTokenText = vstrQuoted;
	}
	else if (IsIdentifierStart( uChar ))
	{
		// The base class assumes we've consumed the first character already for this call.  We should
		// rectify this some day, as it's very confusing.
		fLexerInput->MoveToNextChar();
		VString *vstrNAME = ConsumeIdentifier();
		if (!vstrNAME)
		{
			return false;
		}

		outToken = HTMLLexemes::TEXT;
		if (outTokens)	outTokens->push_back( new HTMLLexerToken( ILexerToken::TT_NAME, fLexerInput->GetCurrentPosition() - vstrNAME->GetLength(), vstrNAME->GetLength(), *vstrNAME, outToken ) );

		fLastTokenText = *vstrNAME;
		delete vstrNAME;
	}
	else
	{
		return false;
	}
	
	SetLastToken( outToken );
	return true;
}
char* Preprocessor::ParseLexem( char* start, char* end, Lexem& out )
{
    if( start == end )
        return start;
    char current_char = *start;

    if( IsTrivial( current_char ) )
    {
        out.Value += current_char;
        out.Type = TrivialTypes[Trivials.find_first_of( current_char )];
        return ++start;
    }

    if( IsIdentifierStart( current_char ) )
        return ParseIdentifier( start, end, out );

    if( current_char == '#' )
    {
        out.Value = "#";
        ++start;
        if( *start == '#' )
        {
            out.Value = "##";
            out.Type = Lexem::IGNORE;
            return ( ++start );
        }
        while( start != end && ( *start == ' ' || *start == '\t' ) )
            ++start;
        if( start != end && IsIdentifierStart( *start ) )
            start = ParseIdentifier( start, end, out );
        out.Type = Lexem::PREPROCESSOR;
        return start;
    }

    if( IsNumber( current_char ) )
        return ParseNumber( start, end, out );
    if( current_char == '\"' )
        return ParseStringLiteral( start, end, '\"', out );
    if( current_char == '\'' )
        return ParseStringLiteral( start, end, '\'', out );  // Todo: set optional ParseCharacterLiteral?
    if( current_char == '/' )
    {
        // Need to see if it's a comment.
        ++start;
        if( start == end )
            return start;
        if( *start == '*' )
            return ParseBlockComment( start, end, out );
        if( *start == '/' )
            return ParseLineComment( start, end, out );
        // Not a comment - let default code catch it as MISC
        --start;
    }
    if( current_char == '\\' )
    {
        out.Type = Lexem::BACKSLASH;
        return ++start;
    }

    out.Value = std::string( 1, current_char );
    out.Type = Lexem::IGNORE;
    return ++start;
}