示例#1
0
bool SIMPLEAPI SkipIdentifier(const wchar_t*& p, CUniString& str, const wchar_t* pszExtraLeadChars, const wchar_t* pszExtraChars)
{
	// Store start
	const wchar_t* pszIdentifier=p;

	// Skip leading character
	if (!IsIdentifierLeadChar(p[0], pszExtraLeadChars))
		return false;
	p++;

	// Skip remaining characters
	while (true)
		{
		if (IsIdentifierChar(p[0], pszExtraChars))
			{
			p++;
			continue;
			}

		break;
		}

	// Setup return value
	str=CUniString(pszIdentifier, int(p-pszIdentifier));

	return true;
}
示例#2
0
// Checks if we're at label.
// Returns the name if so and an empty string if not.
std::string SymFile::GetLabel(bool requireColon)
{
    long start = m_pos;
    long pos = m_pos;

    if (IsIdentifierStartingChar(m_buffer[pos]))
    {
        pos++;

        while (IsIdentifierChar(m_buffer[pos]))
            pos++;
    }

    if (requireColon)
    {
        if (m_buffer[pos] == ':')
        {
            if (pos != start)
                m_pos = pos + 1;
        }
        else
        {
            pos = start;
        }
    }
    else
    {
        m_pos = pos;
    }

    return std::string(&m_buffer[start], pos - start);
}
示例#3
0
bool SIMPLEAPI DoesMatchI(const wchar_t* p, const wchar_t* psz, bool bEndWord)
{
	if (!psz)
		return false;

	size_t iLen=wcslen(psz);
	if (_wcsnicmp(p, psz, iLen)==0 && (!bEndWord || !IsIdentifierChar(p[iLen])))
		{
		return true;
		}
	return false;
}
示例#4
0
bool SIMPLEAPI SkipMatch(const wchar_t*& p, const wchar_t* psz, bool bEndWord)
{
	if (!psz)
		return false;

	size_t iLen=wcslen(psz);
	if (wcsncmp(p, psz, iLen)==0 && (!bEndWord || !IsIdentifierChar(p[iLen])))
		{
		p+=iLen;
		return true;
		}
	return false;
}
示例#5
0
void CFile::TryConvertString()
{
    long oldPos = m_pos;
    long oldLineNum = m_lineNum;
    bool noTerminator = false;

    if (m_buffer[m_pos] != '_' || (m_pos > 0 && IsIdentifierChar(m_buffer[m_pos - 1])))
        return;

    m_pos++;

    if (m_buffer[m_pos] == '_')
    {
        noTerminator = true;
        m_pos++;
    }

    SkipWhitespace();

    if (m_buffer[m_pos] != '(')
    {
        m_pos = oldPos;
        m_lineNum = oldLineNum;
        return;
    }

    m_pos++;

    SkipWhitespace();

    std::printf("{ ");

    while (1)
    {
        SkipWhitespace();

        if (m_buffer[m_pos] == '"')
        {
            unsigned char s[kMaxStringLength];
            int length;
            StringParser stringParser(m_buffer, m_size);

            try
            {
                m_pos += stringParser.ParseString(m_pos, s, length);
            }
            catch (std::runtime_error& e)
            {
                RaiseError(e.what());
            }

            for (int i = 0; i < length; i++)
                printf("0x%02X, ", s[i]);
        }
        else if (m_buffer[m_pos] == ')')
        {
            m_pos++;
            break;
        }
        else
        {
            if (m_pos >= m_size)
                RaiseError("unexpected EOF");
            if (IsAsciiPrintable(m_buffer[m_pos]))
                RaiseError("unexpected character '%c'", m_buffer[m_pos]);
            else
                RaiseError("unexpected character '\\x%02X'", m_buffer[m_pos]);
        }
    }

    if (noTerminator)
        std::printf(" }");
    else
        std::printf("0xFF }");
}
示例#6
0
TOKENID CLexer::ScanToken (CSTOKEN *pToken)
{
    WCHAR       ch, chQuote, chSurrogate = L'\0';
    PCWSTR      p = m_pszCurrent, pszHold = NULL, pszToken;
    BOOL        fReal = FALSE, fEscaped = FALSE, fAtPrefix = FALSE;

    // Initialize for new token scan
    pToken->iChar = pToken->iLine = 0;
    pToken->iUserByte = TID_INVALID;
    pToken->iUserBits = 0;

    // Start scanning the token
    while (pToken->iUserByte == TID_INVALID)
    {
        if (!PositionOf (p, pToken) && !m_fThisLineTooLong)
        {
            ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN);
            m_fLimitExceeded = TRUE;
            m_fThisLineTooLong = TRUE;
        }

        pszToken = p;

        switch (ch = *p++)
        {
            case 0:
            {
                // Back up to point to the 0 again...
                p--;      
                pToken->iUserByte = TID_ENDFILE;
                pToken->iLength = 0;
                break;
            }

            case '\t':
            case ' ':
            {
                // Tabs and spaces tend to roam in groups... scan them together
                while (*p == ' ' || *p == '\t')
                    p++;
                break;
            }

            case UCH_PS:
            case UCH_LS:
            case 0x0085:
            case '\n':
            {
                // This is a new line
                TrackLine (p);
                break;
            }

            case '\r':
            {
                // Bare CR's are lines, but CRLF pairs are considered a single line.
                if (*p == '\n')
                    p++;
                TrackLine (p);
                break;
            }

            // Other Whitespace characters
            case UCH_BOM:   // Unicode Byte-order marker
            case 0x001A:    // Ctrl+Z
            case '\v':      // Vertical Tab
            case '\f':      // Form-feed
            {
                break;
            }

            case '#':
            {
                p--;
                if (!ScanPreprocessorLine (p))
                {
                    ASSERT(!m_fPreproc);
                    p++;
                    ReportInvalidToken(pToken, pszToken, p);
                }
                break;
            }

            case '\"':
            case '\'':
            {
                CStringBuilder  sb;

                // "Normal" strings (double-quoted and single-quoted (char) literals).  We translate escape sequences
                // here, and construct the STRCONST (for strings) directly (char literals are encoded w/o overhead)
                chQuote = ch;
                while (*p != chQuote)
                {
                    WCHAR   c = *p++;

                    if (c == '\\')
                    {
                        WCHAR c2 = 0;
                        c = ScanEscapeSequence (p, &c2);

                        // We use a string building to construct the string constant's value.  Yes, CStringBuilder
                        // is equipped to deal with embedded nul characters.
                        sb.Append (c);
                        if (c2 != 0)
                            sb.Append (c2);
                    }
                    else if (IsEndOfLineChar (c) || c == 0)
                    {
                        ASSERT (p > pszToken);
                        p--;
                        ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_NewlineInConst);
                        pToken->iUserBits |= TF_UNTERMINATED;
                        break;
                    }
                    else
                    {
                        // We use a string building to construct the string constant's value.  Yes, CStringBuilder
                        // is equipped to deal with embedded nul characters.
                        sb.Append (c);
                    }
                }

                // Skip the terminating quote (if present)
                if ((pToken->iUserBits & TF_UNTERMINATED) == 0)
                    p++;

                if (chQuote == '\'')
                {
                    // This was a char literal -- no need to allocate overhead...
                    if (sb.GetLength() != 1)
                        ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), (sb.GetLength() != 0) ? ERR_TooManyCharsInConst : ERR_EmptyCharConst);

                    pToken->iUserByte = TID_CHARLIT;
                    pToken->chr.cCharValue = ((PCWSTR)sb)[0];
                    pToken->chr.iCharLen = (WCHAR)(p - pszToken);
                }
                else
                {
                    // This one requires special allocation.
                    pToken->iUserByte = TID_STRINGLIT;
                    pToken->iUserBits |= TF_OVERHEAD;
                    pToken->pStringLiteral = (STRLITERAL *)TokenMemAlloc (pToken, sizeof (STRLITERAL) + (sb.GetLength() * sizeof (WCHAR)));
                    pToken->pStringLiteral->iSourceLength = (long)(p - pszToken);
                    pToken->pStringLiteral->str.length = (long)sb.GetLength();
                    pToken->pStringLiteral->str.text = (WCHAR *)(pToken->pStringLiteral + 1);
                    memcpy (pToken->pStringLiteral->str.text, (PCWSTR)sb, pToken->pStringLiteral->str.length * sizeof (WCHAR));
                }

                break;
            }

            case '/':
            {
                // Lotsa things start with slash...
                switch (*p)
                {
                    case '/':
                    {
                        // Single-line comments...
                        bool    fDocComment = (p[1] == '/' && p[2] != '/');

                        // Find the end of the line, and make sure it's not too long (even for non-doc comments...)
                        while (*p != 0 && !IsEndOfLineChar (*p)) 
                        {
                            if (p - m_pszCurLine >= MAX_POS_LINE_LEN && !m_fThisLineTooLong)
                            {
                                ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN);
                                m_fLimitExceeded = TRUE;
                                m_fThisLineTooLong = TRUE;
                            }

                            p++;
                        }

                        // Only put comments in the token stream if asked
                        if (RepresentNoiseTokens ())
                        {                            
                            if (fDocComment)
                            {
                                size_t cchToken = (p - pszToken);
                                size_t cchBuffer = cchToken + 1;
                                size_t cbBuffer = cchBuffer * sizeof(WCHAR);

                                // Doc comments require, ironically enough, overhead in the token stream.
                                pToken->iUserByte = TID_DOCCOMMENT;
                                pToken->iUserBits |= TF_OVERHEAD;
                                pToken->pDocLiteral = (DOCLITERAL *)TokenMemAlloc (pToken, sizeof (DOCLITERAL) + cbBuffer);
                                pToken->pDocLiteral->posEnd = POSDATA(m_iCurLine, (long)(p - m_pszCurLine));
                                wcsncpy_s (pToken->pDocLiteral->szText, cchBuffer, pszToken, cchToken);
                                pToken->pDocLiteral->szText[cchToken] = 0;
                            }
                            else
                            {
                                // No overhead incurred for single-line non-doc comments, but we do need the length.
                                pToken->iUserByte = TID_SLCOMMENT;
                                pToken->iLength = (long)(p - pszToken);
                            }
                        }
                        break;
                    }

                    case '*':
                    {
                        bool    fDocComment = (p[1] == '*' && p[2] != '*');
                        BOOL    fDone = FALSE;

                        // Multi-line comments...
                        p++;
                        while (!fDone)
                        {
                            if (*p == 0)
                            {
                                // The comment didn't end.  Report an error at the start point.
                                ErrorAtPosition (pToken->iLine, pToken->iChar, 2, ERR_OpenEndedComment);
                                if (RepresentNoiseTokens ())
                                    pToken->iUserBits |= TF_UNTERMINATED;
                                fDone = TRUE;
                                break;
                            }

                            if (*p == '*' && p[1] == '/')
                            {
                                p += 2;
                                break;
                            }

                            if (IsEndOfLineChar (*p))
                            {
                                if (*p == '\r' && p[1] == '\n')
                                    p++;
                                TrackLine (++p);
                            }
                            else
                            {
                                p++;
                            }
                        }

                        m_fFirstOnLine = FALSE;

                        if (RepresentNoiseTokens ())
                        {
                            pToken->iUserBits |= TF_OVERHEAD;
                            if (fDocComment)
                            {
                                // Doc comments require, ironically enough, overhead in the token stream.
                                size_t cchToken = (p - pszToken);
                                size_t cchBuffer = cchToken + 1; //+1 for null
                                size_t cbBuffer = cchBuffer * sizeof(WCHAR);

                                pToken->iUserByte = TID_MLDOCCOMMENT;
                                pToken->pDocLiteral = (DOCLITERAL *)TokenMemAlloc (pToken, sizeof (DOCLITERAL) + cbBuffer);
                                pToken->pDocLiteral->posEnd = POSDATA(m_iCurLine, (long)(p - m_pszCurLine));
                                wcsncpy_s (pToken->pDocLiteral->szText, cchBuffer, pszToken, cchToken);
                                pToken->pDocLiteral->szText[cchToken] = 0;
                                if (p - m_pszCurLine >= MAX_POS_LINE_LEN && !m_fThisLineTooLong)
                                {
                                    ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN);
                                    m_fLimitExceeded = TRUE;
                                    m_fThisLineTooLong = TRUE;
                                }
                            }
                            else
                            {
                                // For multi-line comments, we don't put the text in but we do need the
                                // end position -- which means ML comments incur overhead...  :-(
                                pToken->iUserByte = TID_MLCOMMENT;
                                pToken->pposEnd = (POSDATA *)TokenMemAlloc (pToken, sizeof (POSDATA));
                                if (!PositionOf (p, pToken->pposEnd) && !m_fThisLineTooLong)
                                {
                                    ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN);
                                    m_fLimitExceeded = TRUE;
                                    m_fThisLineTooLong = TRUE;
                                }
                            }

                        }
                        break;
                    }

                    case '=':
                    {
                        p++;
                        pToken->iUserByte = TID_SLASHEQUAL;
                        pToken->iLength = 2;
                        break;
                    }

                    default:
                    {
                        pToken->iUserByte = TID_SLASH;
                        pToken->iLength = 1;
                        break;
                    }
                }

                break;
            }

            case '.':
            {
                if (*p >= '0' && *p <= '9')
                {
                    p++;
                    ch = 0;
                    goto _parseNumber;
                }
                pToken->iUserByte = TID_DOT;
                pToken->iLength = 1;
                break;
            }

            case ',':
                pToken->iUserByte = TID_COMMA;
                pToken->iLength = 1;
                break;

            case ':':
                if (*p == ':')
                {
                    pToken->iUserByte = TID_COLONCOLON;
                    pToken->iLength = 2;
                    p++;
                }
                else
                {
                    pToken->iUserByte = TID_COLON;
                    pToken->iLength = 1;
                }
                break;

            case ';':
                pToken->iUserByte = TID_SEMICOLON;
                pToken->iLength = 1;
                break;

            case '~':
                pToken->iUserByte = TID_TILDE;
                pToken->iLength = 1;
                break;

            case '!':
            {
                if (*p == '=')
                {
                    pToken->iUserByte = TID_NOTEQUAL;
                    pToken->iLength = 2;
                    p++;
                }
                else
                {
                    pToken->iUserByte = TID_BANG;
                    pToken->iLength = 1;
                }
                break;
            }

            case '=':
            {
                if (*p == '=')
                {
                    pToken->iUserByte = TID_EQUALEQUAL;
                    pToken->iLength = 2;
                    p++;
                }
                else
                {
                    pToken->iUserByte = TID_EQUAL;
                    pToken->iLength = 1;
                }
                break;
            }

            case '*':
            {
                if (*p == '=')
                {
                    pToken->iUserByte = TID_SPLATEQUAL;
                    pToken->iLength = 2;
                    p++;
                }
                else
                {
                    pToken->iUserByte = TID_STAR;
                    pToken->iLength = 1;
                }
                break;
            }

            case '(':
            {
                pToken->iUserByte = TID_OPENPAREN;
                pToken->iLength = 1;
                break;
            }

            case ')':
            {
                pToken->iUserByte = TID_CLOSEPAREN;
                pToken->iLength = 1;
                break;
            }

            case '{':
            {
                pToken->iUserByte = TID_OPENCURLY;
                pToken->iLength = 1;
                break;
            }

            case '}':
            {
                pToken->iUserByte = TID_CLOSECURLY;
                pToken->iLength = 1;
                break;
            }

            case '[':
            {
                pToken->iUserByte = TID_OPENSQUARE;
                pToken->iLength = 1;
                break;
            }

            case ']':
            {
                pToken->iUserByte = TID_CLOSESQUARE;
                pToken->iLength = 1;
                break;
            }

            case '?':
            {
                if (*p == '?')
                {
                    p++;
                    pToken->iUserByte = TID_QUESTQUEST;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_QUESTION;
                    pToken->iLength = 1;
                }
                break;
            }

            case '+':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_PLUSEQUAL;
                    pToken->iLength = 2;
                }
                else if (*p == '+')
                {
                    p++;
                    pToken->iUserByte = TID_PLUSPLUS;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_PLUS;
                    pToken->iLength = 1;
                }
                break;
            }

            case '-':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_MINUSEQUAL;
                    pToken->iLength = 2;
                }
                else if (*p == '-')
                {
                    p++;
                    pToken->iUserByte = TID_MINUSMINUS;
                    pToken->iLength = 2;
                }
                else if (*p == '>')
                {
                    p++;
                    pToken->iUserByte = TID_ARROW;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_MINUS;
                    pToken->iLength = 1;
                }
                break;
            }

            case '%':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_MODEQUAL;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_PERCENT;
                    pToken->iLength = 1;
                }
                break;
            }

            case '&':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_ANDEQUAL;
                    pToken->iLength = 2;
                }
                else if (*p == '&')
                {
                    p++;
                    pToken->iUserByte = TID_LOG_AND;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_AMPERSAND;
                    pToken->iLength = 1;
                }
                break;
            }

            case '^':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_HATEQUAL;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_HAT;
                    pToken->iLength = 1;
                }
                break;
            }

            case '|':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_BAREQUAL;
                    pToken->iLength = 2;
                }
                else if (*p == '|')
                {
                    p++;
                    pToken->iUserByte = TID_LOG_OR;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_BAR;
                    pToken->iLength = 1;
                }
                break;
            }

            case '<':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_LESSEQUAL;
                    pToken->iLength = 2;
                }
                else if (*p == '<')
                {
                    p++;
                    if (*p == '=')
                    {
                        p++;
                        pToken->iUserByte = TID_SHIFTLEFTEQ;
                        pToken->iLength = 3;
                    }
                    else
                    {
                        pToken->iUserByte = TID_SHIFTLEFT;
                        pToken->iLength = 2;
                    }
                }
                else
                {
                    pToken->iUserByte = TID_LESS;
                    pToken->iLength = 1;
                }
                break;
            }

            case '>':
            {
                if (*p == '=')
                {
                    p++;
                    pToken->iUserByte = TID_GREATEREQUAL;
                    pToken->iLength = 2;
                }
                else
                {
                    pToken->iUserByte = TID_GREATER;
                    pToken->iLength = 1;
                }
                break;
            }

            case '@':
            {
                if (*p == '"')
                {
                    CStringBuilder  sb;
                    BOOL            fDone = FALSE;
                    WCHAR           c;

                    // Verbatim string literal.  While scanning/accumulating its value into
                    // the string builder, track lines and ignore escape characters (they don't
                    // apply in VSL's) -- watch for double-quotes as well.
                    p++;
                    while (!fDone)
                    {
                        switch (c = *p++)
                        {
                            case UCH_PS:
                            case UCH_LS:
                            case 0x0085:
                            case '\n':
                            {
                                TrackLine (p);
                                break;
                            }

                            case '\r':
                            {
                                if (*p == '\n')
                                {
                                    sb.Append (c);
                                    c = *p++;
                                }
                                TrackLine (p);
                                break;
                            }

                            case '\"':
                            {
                                if (*p == '\"')
                                    p++;            // Doubled quote -- skip & put the single quote in the string
                                else
                                    fDone = TRUE;
                                break;
                            }

                            case 0:
                            {
                                // Reached the end of the source without finding the end-quote.  Give
                                // an error back at the starting point.
                                ErrorAtPosition (pToken->iLine, pToken->iChar, 2, ERR_UnterminatedStringLit);
                                pToken->iUserBits |= TF_UNTERMINATED;
                                fDone = TRUE;
                                p--;
                                break;
                            }
                            default:
                                ASSERT(!IsEndOfLineChar(c));
                                break;
                        }

                        if (!fDone)
                            sb.Append (c);
                    }

                    pToken->iUserByte = TID_VSLITERAL;
                    pToken->iUserBits |= TF_OVERHEAD;
                    pToken->pVSLiteral = (VSLITERAL *)TokenMemAlloc (pToken, sizeof (VSLITERAL) + (sb.GetLength() * sizeof (WCHAR)));
                    PositionOf (p, &pToken->pVSLiteral->posEnd);
                    pToken->pVSLiteral->str.length = (long)sb.GetLength();
                    pToken->pVSLiteral->str.text = (WCHAR *)(pToken->pVSLiteral + 1);
                    memcpy (pToken->pVSLiteral->str.text, (PCWSTR)sb, sb.GetLength() * sizeof (WCHAR));
                    break;
                }

                // Check for identifiers.  NOTE: unicode escapes are allowed here!
                ch = PeekChar(p, &chSurrogate); 
                if (!IsIdentifierChar (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs
                {
                    // After the '@' we have neither an identifier nor and string quote, so assume it is an identifier.
                    CreateInvalidToken(pToken, pszToken, p);
                    ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_ExpectedVerbatimLiteral);
                    break;
                }

                ch = NextChar(p, &chSurrogate);
                fAtPrefix = TRUE;
                goto _ParseIdentifier;  // (Goto avoids the IsSpaceSeparator() check and the redundant IsIdentifierChar() check below...)
            }

            case '\\':
                // Could be unicode escape. Try that.
                --p;
                ch = NextChar (p, &chSurrogate);

                // If we had a unicode escape, ch is it. If we didn't, ch is still a backslash. Unicode escape
                // must start an identifers, so check only for identifiers now.
                goto _CheckIdentifier;

            default:
                ASSERT(!IsEndOfLineChar(ch));
                if (IsSpaceSeparator (ch))    // Unicode class 'Zs'
                {
                    while (IsSpaceSeparator(*p))
                        p++;
                    break;
                }
_CheckIdentifier:
                if (!IsIdentifierChar (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs
                {
                    ReportInvalidToken(pToken, pszToken, p);
                    break;
                }
                // Fall through case.  All the 'common' identifier characters are represented directly in
                // these switch cases for optimal perf.  Calling IsIdentifierChar() functions is relatively
                // expensive.
            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
            case '_':
_ParseIdentifier:
            {
                CStringBuilder  sb;
                bool            doubleUnderscore = false;

                // Remember, because we're processing identifiers here, unicode escape sequences are
                // allowed and must be handled
                sb.Append (ch);
                if (chSurrogate)
                    sb.Append(chSurrogate);

                do
                {
                    ch = PeekChar (p, &chSurrogate);
                    switch (ch)
                    {
                        case '_':
                            // Common identifier character, but we need check for double consecutive underscores
                            if (!doubleUnderscore && ((PWSTR)sb)[sb.GetLength() - 1] == '_')
                                doubleUnderscore = true;
                            break;

                        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
                        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
                        case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
                        {
                            // Again, these are the 'common' identifier characters...
                            break;
                        }
                        case ' ': case '\t': case '.': case ';': case '(': case ')': case ',':
                        {
                            // ...and these are the 'common' stop characters.
                            goto LoopExit;
                        }
                        default:
                        {
                            // This is the 'expensive' call
                            if (IsIdentifierCharOrDigit (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs
                            {
                                if (IsOtherFormat (ch))
                                {
                                    goto SkipChar; // Ignore formatting characters
                                }
                            }
                            else
                            {
                                // Not a valid identifier character, so bail.
                                goto LoopExit;
                            }
                        }
                    }
                    sb.Append (ch);
                    if (chSurrogate)
                        sb.Append(chSurrogate);

SkipChar:
                    ch = NextChar (p, &chSurrogate);
                }
                while (ch);

LoopExit:
                HRESULT hr;
                if (!SUCCEEDED(hr = sb.GetResultCode()))
                {
                    m_hr = hr;
                    return TID_INVALID;
                }

                PCWSTR  pszName = sb;
                long    iLength = (long)sb.GetLength();

                // "escaped" means there was an @ prefix, or there was a unicode escape -- both of which
                // indicate overhead, since the identifier length will not be equal to the token length
                fEscaped = (fAtPrefix || (p - pszToken > iLength));

                if (sb.GetLength() >= MAX_IDENT_SIZE)
                {
                    ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_IdentifierTooLong);
                    iLength = MAX_IDENT_SIZE - 1;
                }

                int     iKeyword;

                // Add the identifier to the name table
                pToken->pName = m_pNameMgr->AddString (pszName, iLength);

                // ...and check to see if it is a keyword, if appropriate
                if (fEscaped || !m_pNameMgr->IsNameKeyword (pToken->pName, m_eKeywordMode, &iKeyword))
                {
                    pToken->iUserByte = TID_IDENTIFIER;

                    if (doubleUnderscore && !fAtPrefix && m_eKeywordMode == CompatibilityECMA1) {
                        ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_ReservedIdentifier, pToken->pName->text);
                    }

                    if (fEscaped)
                    {
                        NAME    *pName = pToken->pName;     // Hold this so assignment to pEscName doesn't whack it

                        pToken->iUserBits |= TF_OVERHEAD;
                        pToken->pEscName = (ESCAPEDNAME *)TokenMemAlloc (pToken, sizeof (ESCAPEDNAME));
                        pToken->pEscName->iLen = (long)(p - pszToken);
                        pToken->pEscName->pName = pName;
                    }
                }
                else
                {
                    pToken->iUserByte = iKeyword;
                    pToken->iLength = iLength;
                }
                
                if (fAtPrefix)
                {
                    pToken->iUserBits |= TF_VERBATIMSTRING; // We need to know this later
                }

                break;
            }

            case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
            {
                BOOL fHexNumber;
                if ((fHexNumber = (ch == '0' && (*p == 'x' || *p == 'X'))))
                {
                    // it's a hex constant
                    p++;

                    // It's OK if it has no digits after the '0x' -- we'll catch it in ScanNumericLiteral
                    // and give a proper error then.
                    while (*p <= 'f' && isxdigit (*p))
                        p++;

                    if (*p == 'L' || *p == 'l')
                    {
                        p++;
                        if (*p == 'u' || *p == 'U')
                            p++;
                    }
                    else if (*p == 'u' || *p == 'U')
                    {
                        p++;
                        if (*p == 'L' || *p == 'l')
                            p++;
                    }
                }
                else
                {
                    // skip digits
                    while (*p >= '0' && *p <= '9')
                        p++;
                    if (*p == '.')
                    {
                        pszHold = p++;
                        if (*p >= '0' && *p <= '9')
                        {
                            // skip digits after decimal point
                            p++;
    _parseNumber:
                            fHexNumber = false;
                            fReal = TRUE;
                            while (*p >= '0' && *p <= '9')
                                p++;
                        }
                        else
                        {
                            // Number + dot + non-digit -- these are separate tokens, so don't absorb the
                            // dot token into the number.
                            p = pszHold;
                            size_t cchToken = (p - pszToken);
                            size_t cchBuffer = cchToken + 1;
                            size_t cbBuffer = cchBuffer * sizeof(WCHAR); 

                            pToken->iUserByte = TID_NUMBER;
                            pToken->iUserBits |= TF_OVERHEAD;
                            pToken->pLiteral = (LITERAL *)TokenMemAlloc (pToken, sizeof (LITERAL) + cbBuffer);
                            pToken->pLiteral->iSourceLength = (long)cchToken;
                            wcsncpy_s (pToken->pLiteral->szText, cchBuffer, pszToken, cchToken);
                            pToken->pLiteral->szText[cchBuffer] = 0;
                            break;
                        }
                    }

                    if (*p == 'E' || *p == 'e')
                    {
                        fReal = TRUE;

                        // skip exponent
                        p++;
                        if (*p == '+' || *p == '-')
                            p++;

                        while (*p >= '0' && *p <= '9')
                            p++;
                    }

                    if (fReal)
                    {
                        if (*p == 'f' || *p == 'F' || *p == 'D' || *p == 'd' || *p == 'm' || *p == 'M')
                            p++;
                    }
                    else if (*p == 'F' || *p == 'f' || *p == 'D' || *p == 'd' || *p == 'm' || *p == 'M')
                    {
                        p++;
                    }
                    else if (*p == 'L' || *p == 'l')
                    {
                        p++;
                        if (*p == 'u' || *p == 'U')
                            p++;
                    }
                    else if (*p == 'u' || *p == 'U')
                    {
                        p++;
                        if (*p == 'L' || *p == 'l')
                            p++;
                    }
                }
                size_t cchToken = (p - pszToken);
                size_t cchBuffer = cchToken + 1;
                size_t cbBuffer = cchBuffer * sizeof (WCHAR);

                pToken->iUserByte = TID_NUMBER;
                pToken->iUserBits |= TF_OVERHEAD;
                if (fHexNumber) 
                    pToken->iUserBits |= TF_HEXLITERAL;
                pToken->pLiteral = (LITERAL *)TokenMemAlloc (pToken, sizeof (LITERAL) + cbBuffer); 
                pToken->pLiteral->iSourceLength = (long)(cchToken);
                wcsncpy_s (pToken->pLiteral->szText, cchBuffer, pszToken, cchToken);
                pToken->pLiteral->szText[cchToken] = 0;
                break;
            }
        } // switch
    } // while

    m_pszCurrent = p;
    m_fFirstOnLine = FALSE;
    if (!m_fTokensSeen)
        m_fTokensSeen = ((CParser::m_rgTokenInfo[pToken->Token()].dwFlags & TFF_NOISE) == 0);
    return pToken->Token();
}
示例#7
0
BabelTokens CLexer::NextToken (CString *retsToken, Options iMode)

//	NextToken
//
//	Returns the next token in the stream

	{
	enum States
		{
		stStart,
		stStartCode,
		stDone,
		stDoneKeepNextChar,

		stCheckForDoubleColon,
		stCheckForComment,
		stIdentifier,
		stLineComment,
		stBlockComment,
		stCheckForEndBlockComment,
		stQuotedString,
		stEscapeQuote,
		stLineBlock,
		stCheckForEndLineBlock,
		stParagraph,
		stCheckForEndParagraph,
		stCheckForNegativeNumber,
		stInteger,
		stHexInteger,

		stCode,
		stCodeCheckForComment,
		stCodeLineComment,
		stCodeBlockComment,
		stCodeBlockCommentEnd,
		};

	//	Start in a different state depending on the mode

	States iState;
	if (iMode == modeCode)
		iState = stStartCode;
	else
		iState = stStart;

	//	Initialize

	char *pStart;
	m_sToken = NULL;
	m_iToken = tkEOS;

	States iNextState;					//	Used for stEscapeQuote
	bool bBlankLine;					//	Used for stParagraph
	int iNesting;						//	Used for stStartCode

	while (iState != stDone && iState != stDoneKeepNextChar)
		{
		if (m_pPos == m_pEndPos)
			break;

		switch (iState)
			{
			case stStart:
				{
				switch (*m_pPos)
					{
					//	Swallow whitespace

					case ' ':
					case '\t':
					case '\r':
					case '\n':
						break;

					//	Comment

					case '/':
						iState = stCheckForComment;
						break;

					//	Quoted string

					case '\"':
						pStart = m_pPos + 1;
						iState = stQuotedString;
						break;

					//	Line block

					case '|':
						pStart = m_pPos + 1;
						iState = stLineBlock;
						break;

					//	Paragraph

					case '¶':
						pStart = m_pPos + 1;
						bBlankLine = true;
						iState = stParagraph;
						break;

					//	Symbols

					case '*':
						m_sToken = CONSTLIT("*");
						m_iToken = tkStar;
						iState = stDone;
						break;

					case ':':
						iState = stCheckForDoubleColon;
						break;

					case ';':
						m_sToken = CONSTLIT(";");
						m_iToken = tkSemiColon;
						iState = stDone;
						break;

					case '(':
						m_sToken = CONSTLIT("(");
						m_iToken = tkLeftParen;
						iState = stDone;
						break;

					case ')':
						m_sToken = CONSTLIT(")");
						m_iToken = tkRightParen;
						iState = stDone;
						break;

					case '[':
						m_sToken = CONSTLIT("[");
						m_iToken = tkLeftBracket;
						iState = stDone;
						break;

					case ']':
						m_sToken = CONSTLIT("]");
						m_iToken = tkRightBracket;
						iState = stDone;
						break;

					case '{':
						m_sToken = CONSTLIT("{");
						m_iToken = tkLeftBrace;
						iState = stDone;
						break;

					case '}':
						m_sToken = CONSTLIT("}");
						m_iToken = tkRightBrace;
						iState = stDone;
						break;

					case '=':
						m_sToken = CONSTLIT("=");
						m_iToken = tkEquals;
						iState = stDone;
						break;

					case '>':
						m_sToken = CONSTLIT(">");
						m_iToken = tkGreaterThan;
						iState = stDone;
						break;

					case '<':
						m_sToken = CONSTLIT("<");
						m_iToken = tkLessThan;
						iState = stDone;
						break;

					case ',':
						m_sToken = CONSTLIT(",");
						m_iToken = tkComma;
						iState = stDone;
						break;

					case '!':
						m_sToken = CONSTLIT("!");
						m_iToken = tkBang;
						iState = stDone;
						break;

					case '-':
						pStart = m_pPos;
						iState = stCheckForNegativeNumber;
						break;

					default:
						if (IsDigit(*m_pPos))
							{
							pStart = m_pPos;
							iState = stInteger;
							}
						else if (IsIdentifierChar(*m_pPos))
							{
							pStart = m_pPos;
							iState = stIdentifier;
							}
						else
							{
							m_sToken = CString(m_pPos, 1);
							m_iToken = tkOtherSymbol;
							iState = stDone;
							break;
							}
						break;
					}
				break;
				}

			case stCheckForNegativeNumber:
				{
				if (IsDigit(*m_pPos))
					iState = stInteger;
				else
					{
					m_sToken = CONSTLIT("-");
					m_iToken = tkOtherSymbol;
					iState = stDoneKeepNextChar;
					}
				}

			case stCheckForDoubleColon:
				{
				if (*m_pPos == ':')
					{
					m_sToken = CONSTLIT("::");
					m_iToken = tkDoubleColon;
					iState = stDone;
					}
				else
					{
					m_sToken = CONSTLIT(":");
					m_iToken = tkColon;
					iState = stDoneKeepNextChar;
					}
				break;
				}

			case stCheckForComment:
				{
				switch (*m_pPos)
					{
					case '/':
						iState = stLineComment;
						break;

					case '*':
						iState = stBlockComment;
						break;

					default:
						{
						m_sToken = CONSTLIT("/");
						m_iToken = tkSlash;
						iState = stDoneKeepNextChar;
						}
					}
				break;
				}

			case stInteger:
				{
				if (IsDigit(*m_pPos))
					;
				else if (*m_pPos == 'x' || *m_pPos == 'X')
					iState = stHexInteger;
				else
					{
					m_sToken.Append(CString(pStart, m_pPos - pStart));
					m_iToken = tkInteger;
					iState = stDoneKeepNextChar;
					}
				break;
				}

			case stHexInteger:
				{
				if (IsDigit(*m_pPos)
						|| (*m_pPos >= 'A' && *m_pPos <= 'F')
						|| (*m_pPos >= 'a' && *m_pPos <= 'f'))
					;
				else
					{
					m_sToken.Append(CString(pStart, m_pPos - pStart));
					m_iToken = tkInteger;
					iState = stDoneKeepNextChar;
					}
				break;
				}

			case stLineComment:
				{
				switch (*m_pPos)
					{
					case '\n':
						iState = stStart;
						break;
					}
				break;
				}

			case stBlockComment:
				{
				switch (*m_pPos)
					{
					case '*':
						iState = stCheckForEndBlockComment;
						break;
					}
				break;
				}

			case stCheckForEndBlockComment:
				{
				switch (*m_pPos)
					{
					case '/':
						iState = stStart;
						break;

					case '*':
						break;

					default:
						iState = stBlockComment;
						break;
					}
				break;
				}

			case stIdentifier:
				{
				//	If we're at the end, return it

				if (!IsIdentifierChar(*m_pPos))
					{
					m_sToken = CString(pStart, m_pPos - pStart);
					m_iToken = tkIdentifier;
					iState = stDoneKeepNextChar;
					}
				break;
				}

			case stQuotedString:
				{
				switch (*m_pPos)
					{
					case '\"':
						{
						m_sToken.Append(CString(pStart, m_pPos - pStart));
						m_iToken = tkString;
						iState = stDone;
						break;
						}

					case '\\':
						m_sToken.Append(CString(pStart, m_pPos - pStart));
						iState = stEscapeQuote;
						iNextState = stQuotedString;
						break;
					}
				break;
				}

			case stEscapeQuote:
				{
				m_sToken.Append(CString(m_pPos, 1));
				pStart = m_pPos+1;
				iState = iNextState;
				break;
				}

			case stLineBlock:
				{
				switch (*m_pPos)
					{
					case '\n':
					case '\r':
						m_sToken.Append(CString(pStart, m_pPos - pStart));
						iState = stCheckForEndLineBlock;
						break;

					case '\\':
						m_sToken.Append(CString(pStart, m_pPos - pStart));
						iState = stEscapeQuote;
						iNextState = stLineBlock;
						break;
					}
				break;
				}

			case stCheckForEndLineBlock:
				{
				switch (*m_pPos)
					{
					case ' ':
					case '\t':
					case '\r':
					case '\n':
						break;

					case '|':
						m_sToken.Append(CONSTLIT("\n"));
						pStart = m_pPos + 1;
						iState = stLineBlock;
						break;

					default:
						{
						m_iToken = tkLineBlock;
						iState = stDoneKeepNextChar;
						}
					}
				break;
				}

			case stParagraph:
				{
				switch (*m_pPos)
					{
					case '\n':
					case '\r':
						m_sToken.Append(CString(pStart, m_pPos - pStart));
						iState = stCheckForEndParagraph;
						break;

					case '\\':
						m_sToken.Append(CString(pStart, m_pPos - pStart));
						iState = stEscapeQuote;
						iNextState = stParagraph;
						break;

					default:
						bBlankLine = false;
					}
				break;
				}

			case stCheckForEndParagraph:
				{
				switch (*m_pPos)
					{
					case ' ':
					case '\t':
					case '\r':
					case '\n':
						break;

					case '|':
						if (bBlankLine)
							m_sToken.Append(CONSTLIT("\n"));
						else
							m_sToken.Append(CONSTLIT(" "));

						bBlankLine = true;
						pStart = m_pPos + 1;
						iState = stParagraph;
						break;

					default:
						{
						m_iToken = tkParagraph;
						iState = stDoneKeepNextChar;
						}
					}
				break;
				}

			case stStartCode:
				{
				switch (*m_pPos)
					{
					//	If this is a close brace then we are done

					case '}':
						{
						iState = stDone;
						m_iToken = tkCode;
						m_sToken = CONSTLIT("");
						break;
						}

					//	Otherwise, we keep parsing until our nesting
					//	level is down to 0.

					default:
						{
						pStart = m_pPos;
						iNesting = 1;
						iState = stCode;
						}
					}
				break;
				}

			case stCode:
				{
				switch (*m_pPos)
					{
					case '/':
						{
						iState = stCodeCheckForComment;
						break;
						}

					case '{':
						{
						iNesting++;
						break;
						}

					case '}':
						{
						iNesting--;
						if (iNesting == 0)
							{
							iState = stDone;
							m_iToken = tkCode;
							m_sToken = CString(pStart, m_pPos - pStart);
							}
						break;
						}
					}
				break;
				}

			case stCodeCheckForComment:
				{
				switch (*m_pPos)
					{
					case '/':
						iState = stCodeLineComment;
						break;

					case '*':
						iState = stCodeBlockComment;
						break;

					default:
						{
						iState = stCode;
						}
					}
				break;
				}

			case stCodeLineComment:
				{
				switch (*m_pPos)
					{
					case '\r':
					case '\n':
						iState = stCode;
						break;
					}
				break;
				}

			case stCodeBlockComment:
				{
				switch (*m_pPos)
					{
					case '*':
						iState = stCodeBlockCommentEnd;
						break;
					}
				break;
				}

			case stCodeBlockCommentEnd:
				{
				switch (*m_pPos)
					{
					case '/':
						iState = stCode;
						break;

					default:
						iState = stCodeBlockComment;
					}
				break;
				}

			default:
				ASSERT(false);
			}

		if (iState != stDoneKeepNextChar)
			{
			if (*m_pPos == '\n')
				m_iLineNumber++;

			m_pPos++;
			}
		}

	//	Done

	if (retsToken)
		*retsToken = m_sToken;

	return m_iToken;
	}