bool SIMPLEAPI SkipIdentifier(const wchar_t*& p, CUniString& str, const wchar_t* pszExtraLeadChars, const wchar_t* pszExtraChars) { // Store start const wchar_t* pszIdentifier=p; // Skip leading character if (!IsIdentifierLeadChar(p[0], pszExtraLeadChars)) return false; p++; // Skip remaining characters while (true) { if (IsIdentifierChar(p[0], pszExtraChars)) { p++; continue; } break; } // Setup return value str=CUniString(pszIdentifier, int(p-pszIdentifier)); return true; }
// Checks if we're at label. // Returns the name if so and an empty string if not. std::string SymFile::GetLabel(bool requireColon) { long start = m_pos; long pos = m_pos; if (IsIdentifierStartingChar(m_buffer[pos])) { pos++; while (IsIdentifierChar(m_buffer[pos])) pos++; } if (requireColon) { if (m_buffer[pos] == ':') { if (pos != start) m_pos = pos + 1; } else { pos = start; } } else { m_pos = pos; } return std::string(&m_buffer[start], pos - start); }
bool SIMPLEAPI DoesMatchI(const wchar_t* p, const wchar_t* psz, bool bEndWord) { if (!psz) return false; size_t iLen=wcslen(psz); if (_wcsnicmp(p, psz, iLen)==0 && (!bEndWord || !IsIdentifierChar(p[iLen]))) { return true; } return false; }
bool SIMPLEAPI SkipMatch(const wchar_t*& p, const wchar_t* psz, bool bEndWord) { if (!psz) return false; size_t iLen=wcslen(psz); if (wcsncmp(p, psz, iLen)==0 && (!bEndWord || !IsIdentifierChar(p[iLen]))) { p+=iLen; return true; } return false; }
void CFile::TryConvertString() { long oldPos = m_pos; long oldLineNum = m_lineNum; bool noTerminator = false; if (m_buffer[m_pos] != '_' || (m_pos > 0 && IsIdentifierChar(m_buffer[m_pos - 1]))) return; m_pos++; if (m_buffer[m_pos] == '_') { noTerminator = true; m_pos++; } SkipWhitespace(); if (m_buffer[m_pos] != '(') { m_pos = oldPos; m_lineNum = oldLineNum; return; } m_pos++; SkipWhitespace(); std::printf("{ "); while (1) { SkipWhitespace(); if (m_buffer[m_pos] == '"') { unsigned char s[kMaxStringLength]; int length; StringParser stringParser(m_buffer, m_size); try { m_pos += stringParser.ParseString(m_pos, s, length); } catch (std::runtime_error& e) { RaiseError(e.what()); } for (int i = 0; i < length; i++) printf("0x%02X, ", s[i]); } else if (m_buffer[m_pos] == ')') { m_pos++; break; } else { if (m_pos >= m_size) RaiseError("unexpected EOF"); if (IsAsciiPrintable(m_buffer[m_pos])) RaiseError("unexpected character '%c'", m_buffer[m_pos]); else RaiseError("unexpected character '\\x%02X'", m_buffer[m_pos]); } } if (noTerminator) std::printf(" }"); else std::printf("0xFF }"); }
TOKENID CLexer::ScanToken (CSTOKEN *pToken) { WCHAR ch, chQuote, chSurrogate = L'\0'; PCWSTR p = m_pszCurrent, pszHold = NULL, pszToken; BOOL fReal = FALSE, fEscaped = FALSE, fAtPrefix = FALSE; // Initialize for new token scan pToken->iChar = pToken->iLine = 0; pToken->iUserByte = TID_INVALID; pToken->iUserBits = 0; // Start scanning the token while (pToken->iUserByte == TID_INVALID) { if (!PositionOf (p, pToken) && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } pszToken = p; switch (ch = *p++) { case 0: { // Back up to point to the 0 again... p--; pToken->iUserByte = TID_ENDFILE; pToken->iLength = 0; break; } case '\t': case ' ': { // Tabs and spaces tend to roam in groups... scan them together while (*p == ' ' || *p == '\t') p++; break; } case UCH_PS: case UCH_LS: case 0x0085: case '\n': { // This is a new line TrackLine (p); break; } case '\r': { // Bare CR's are lines, but CRLF pairs are considered a single line. if (*p == '\n') p++; TrackLine (p); break; } // Other Whitespace characters case UCH_BOM: // Unicode Byte-order marker case 0x001A: // Ctrl+Z case '\v': // Vertical Tab case '\f': // Form-feed { break; } case '#': { p--; if (!ScanPreprocessorLine (p)) { ASSERT(!m_fPreproc); p++; ReportInvalidToken(pToken, pszToken, p); } break; } case '\"': case '\'': { CStringBuilder sb; // "Normal" strings (double-quoted and single-quoted (char) literals). We translate escape sequences // here, and construct the STRCONST (for strings) directly (char literals are encoded w/o overhead) chQuote = ch; while (*p != chQuote) { WCHAR c = *p++; if (c == '\\') { WCHAR c2 = 0; c = ScanEscapeSequence (p, &c2); // We use a string building to construct the string constant's value. Yes, CStringBuilder // is equipped to deal with embedded nul characters. sb.Append (c); if (c2 != 0) sb.Append (c2); } else if (IsEndOfLineChar (c) || c == 0) { ASSERT (p > pszToken); p--; ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_NewlineInConst); pToken->iUserBits |= TF_UNTERMINATED; break; } else { // We use a string building to construct the string constant's value. Yes, CStringBuilder // is equipped to deal with embedded nul characters. sb.Append (c); } } // Skip the terminating quote (if present) if ((pToken->iUserBits & TF_UNTERMINATED) == 0) p++; if (chQuote == '\'') { // This was a char literal -- no need to allocate overhead... if (sb.GetLength() != 1) ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), (sb.GetLength() != 0) ? ERR_TooManyCharsInConst : ERR_EmptyCharConst); pToken->iUserByte = TID_CHARLIT; pToken->chr.cCharValue = ((PCWSTR)sb)[0]; pToken->chr.iCharLen = (WCHAR)(p - pszToken); } else { // This one requires special allocation. pToken->iUserByte = TID_STRINGLIT; pToken->iUserBits |= TF_OVERHEAD; pToken->pStringLiteral = (STRLITERAL *)TokenMemAlloc (pToken, sizeof (STRLITERAL) + (sb.GetLength() * sizeof (WCHAR))); pToken->pStringLiteral->iSourceLength = (long)(p - pszToken); pToken->pStringLiteral->str.length = (long)sb.GetLength(); pToken->pStringLiteral->str.text = (WCHAR *)(pToken->pStringLiteral + 1); memcpy (pToken->pStringLiteral->str.text, (PCWSTR)sb, pToken->pStringLiteral->str.length * sizeof (WCHAR)); } break; } case '/': { // Lotsa things start with slash... switch (*p) { case '/': { // Single-line comments... bool fDocComment = (p[1] == '/' && p[2] != '/'); // Find the end of the line, and make sure it's not too long (even for non-doc comments...) while (*p != 0 && !IsEndOfLineChar (*p)) { if (p - m_pszCurLine >= MAX_POS_LINE_LEN && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } p++; } // Only put comments in the token stream if asked if (RepresentNoiseTokens ()) { if (fDocComment) { size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; size_t cbBuffer = cchBuffer * sizeof(WCHAR); // Doc comments require, ironically enough, overhead in the token stream. pToken->iUserByte = TID_DOCCOMMENT; pToken->iUserBits |= TF_OVERHEAD; pToken->pDocLiteral = (DOCLITERAL *)TokenMemAlloc (pToken, sizeof (DOCLITERAL) + cbBuffer); pToken->pDocLiteral->posEnd = POSDATA(m_iCurLine, (long)(p - m_pszCurLine)); wcsncpy_s (pToken->pDocLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pDocLiteral->szText[cchToken] = 0; } else { // No overhead incurred for single-line non-doc comments, but we do need the length. pToken->iUserByte = TID_SLCOMMENT; pToken->iLength = (long)(p - pszToken); } } break; } case '*': { bool fDocComment = (p[1] == '*' && p[2] != '*'); BOOL fDone = FALSE; // Multi-line comments... p++; while (!fDone) { if (*p == 0) { // The comment didn't end. Report an error at the start point. ErrorAtPosition (pToken->iLine, pToken->iChar, 2, ERR_OpenEndedComment); if (RepresentNoiseTokens ()) pToken->iUserBits |= TF_UNTERMINATED; fDone = TRUE; break; } if (*p == '*' && p[1] == '/') { p += 2; break; } if (IsEndOfLineChar (*p)) { if (*p == '\r' && p[1] == '\n') p++; TrackLine (++p); } else { p++; } } m_fFirstOnLine = FALSE; if (RepresentNoiseTokens ()) { pToken->iUserBits |= TF_OVERHEAD; if (fDocComment) { // Doc comments require, ironically enough, overhead in the token stream. size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; //+1 for null size_t cbBuffer = cchBuffer * sizeof(WCHAR); pToken->iUserByte = TID_MLDOCCOMMENT; pToken->pDocLiteral = (DOCLITERAL *)TokenMemAlloc (pToken, sizeof (DOCLITERAL) + cbBuffer); pToken->pDocLiteral->posEnd = POSDATA(m_iCurLine, (long)(p - m_pszCurLine)); wcsncpy_s (pToken->pDocLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pDocLiteral->szText[cchToken] = 0; if (p - m_pszCurLine >= MAX_POS_LINE_LEN && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } } else { // For multi-line comments, we don't put the text in but we do need the // end position -- which means ML comments incur overhead... :-( pToken->iUserByte = TID_MLCOMMENT; pToken->pposEnd = (POSDATA *)TokenMemAlloc (pToken, sizeof (POSDATA)); if (!PositionOf (p, pToken->pposEnd) && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } } } break; } case '=': { p++; pToken->iUserByte = TID_SLASHEQUAL; pToken->iLength = 2; break; } default: { pToken->iUserByte = TID_SLASH; pToken->iLength = 1; break; } } break; } case '.': { if (*p >= '0' && *p <= '9') { p++; ch = 0; goto _parseNumber; } pToken->iUserByte = TID_DOT; pToken->iLength = 1; break; } case ',': pToken->iUserByte = TID_COMMA; pToken->iLength = 1; break; case ':': if (*p == ':') { pToken->iUserByte = TID_COLONCOLON; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_COLON; pToken->iLength = 1; } break; case ';': pToken->iUserByte = TID_SEMICOLON; pToken->iLength = 1; break; case '~': pToken->iUserByte = TID_TILDE; pToken->iLength = 1; break; case '!': { if (*p == '=') { pToken->iUserByte = TID_NOTEQUAL; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_BANG; pToken->iLength = 1; } break; } case '=': { if (*p == '=') { pToken->iUserByte = TID_EQUALEQUAL; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_EQUAL; pToken->iLength = 1; } break; } case '*': { if (*p == '=') { pToken->iUserByte = TID_SPLATEQUAL; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_STAR; pToken->iLength = 1; } break; } case '(': { pToken->iUserByte = TID_OPENPAREN; pToken->iLength = 1; break; } case ')': { pToken->iUserByte = TID_CLOSEPAREN; pToken->iLength = 1; break; } case '{': { pToken->iUserByte = TID_OPENCURLY; pToken->iLength = 1; break; } case '}': { pToken->iUserByte = TID_CLOSECURLY; pToken->iLength = 1; break; } case '[': { pToken->iUserByte = TID_OPENSQUARE; pToken->iLength = 1; break; } case ']': { pToken->iUserByte = TID_CLOSESQUARE; pToken->iLength = 1; break; } case '?': { if (*p == '?') { p++; pToken->iUserByte = TID_QUESTQUEST; pToken->iLength = 2; } else { pToken->iUserByte = TID_QUESTION; pToken->iLength = 1; } break; } case '+': { if (*p == '=') { p++; pToken->iUserByte = TID_PLUSEQUAL; pToken->iLength = 2; } else if (*p == '+') { p++; pToken->iUserByte = TID_PLUSPLUS; pToken->iLength = 2; } else { pToken->iUserByte = TID_PLUS; pToken->iLength = 1; } break; } case '-': { if (*p == '=') { p++; pToken->iUserByte = TID_MINUSEQUAL; pToken->iLength = 2; } else if (*p == '-') { p++; pToken->iUserByte = TID_MINUSMINUS; pToken->iLength = 2; } else if (*p == '>') { p++; pToken->iUserByte = TID_ARROW; pToken->iLength = 2; } else { pToken->iUserByte = TID_MINUS; pToken->iLength = 1; } break; } case '%': { if (*p == '=') { p++; pToken->iUserByte = TID_MODEQUAL; pToken->iLength = 2; } else { pToken->iUserByte = TID_PERCENT; pToken->iLength = 1; } break; } case '&': { if (*p == '=') { p++; pToken->iUserByte = TID_ANDEQUAL; pToken->iLength = 2; } else if (*p == '&') { p++; pToken->iUserByte = TID_LOG_AND; pToken->iLength = 2; } else { pToken->iUserByte = TID_AMPERSAND; pToken->iLength = 1; } break; } case '^': { if (*p == '=') { p++; pToken->iUserByte = TID_HATEQUAL; pToken->iLength = 2; } else { pToken->iUserByte = TID_HAT; pToken->iLength = 1; } break; } case '|': { if (*p == '=') { p++; pToken->iUserByte = TID_BAREQUAL; pToken->iLength = 2; } else if (*p == '|') { p++; pToken->iUserByte = TID_LOG_OR; pToken->iLength = 2; } else { pToken->iUserByte = TID_BAR; pToken->iLength = 1; } break; } case '<': { if (*p == '=') { p++; pToken->iUserByte = TID_LESSEQUAL; pToken->iLength = 2; } else if (*p == '<') { p++; if (*p == '=') { p++; pToken->iUserByte = TID_SHIFTLEFTEQ; pToken->iLength = 3; } else { pToken->iUserByte = TID_SHIFTLEFT; pToken->iLength = 2; } } else { pToken->iUserByte = TID_LESS; pToken->iLength = 1; } break; } case '>': { if (*p == '=') { p++; pToken->iUserByte = TID_GREATEREQUAL; pToken->iLength = 2; } else { pToken->iUserByte = TID_GREATER; pToken->iLength = 1; } break; } case '@': { if (*p == '"') { CStringBuilder sb; BOOL fDone = FALSE; WCHAR c; // Verbatim string literal. While scanning/accumulating its value into // the string builder, track lines and ignore escape characters (they don't // apply in VSL's) -- watch for double-quotes as well. p++; while (!fDone) { switch (c = *p++) { case UCH_PS: case UCH_LS: case 0x0085: case '\n': { TrackLine (p); break; } case '\r': { if (*p == '\n') { sb.Append (c); c = *p++; } TrackLine (p); break; } case '\"': { if (*p == '\"') p++; // Doubled quote -- skip & put the single quote in the string else fDone = TRUE; break; } case 0: { // Reached the end of the source without finding the end-quote. Give // an error back at the starting point. ErrorAtPosition (pToken->iLine, pToken->iChar, 2, ERR_UnterminatedStringLit); pToken->iUserBits |= TF_UNTERMINATED; fDone = TRUE; p--; break; } default: ASSERT(!IsEndOfLineChar(c)); break; } if (!fDone) sb.Append (c); } pToken->iUserByte = TID_VSLITERAL; pToken->iUserBits |= TF_OVERHEAD; pToken->pVSLiteral = (VSLITERAL *)TokenMemAlloc (pToken, sizeof (VSLITERAL) + (sb.GetLength() * sizeof (WCHAR))); PositionOf (p, &pToken->pVSLiteral->posEnd); pToken->pVSLiteral->str.length = (long)sb.GetLength(); pToken->pVSLiteral->str.text = (WCHAR *)(pToken->pVSLiteral + 1); memcpy (pToken->pVSLiteral->str.text, (PCWSTR)sb, sb.GetLength() * sizeof (WCHAR)); break; } // Check for identifiers. NOTE: unicode escapes are allowed here! ch = PeekChar(p, &chSurrogate); if (!IsIdentifierChar (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs { // After the '@' we have neither an identifier nor and string quote, so assume it is an identifier. CreateInvalidToken(pToken, pszToken, p); ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_ExpectedVerbatimLiteral); break; } ch = NextChar(p, &chSurrogate); fAtPrefix = TRUE; goto _ParseIdentifier; // (Goto avoids the IsSpaceSeparator() check and the redundant IsIdentifierChar() check below...) } case '\\': // Could be unicode escape. Try that. --p; ch = NextChar (p, &chSurrogate); // If we had a unicode escape, ch is it. If we didn't, ch is still a backslash. Unicode escape // must start an identifers, so check only for identifiers now. goto _CheckIdentifier; default: ASSERT(!IsEndOfLineChar(ch)); if (IsSpaceSeparator (ch)) // Unicode class 'Zs' { while (IsSpaceSeparator(*p)) p++; break; } _CheckIdentifier: if (!IsIdentifierChar (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs { ReportInvalidToken(pToken, pszToken, p); break; } // Fall through case. All the 'common' identifier characters are represented directly in // these switch cases for optimal perf. Calling IsIdentifierChar() functions is relatively // expensive. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': _ParseIdentifier: { CStringBuilder sb; bool doubleUnderscore = false; // Remember, because we're processing identifiers here, unicode escape sequences are // allowed and must be handled sb.Append (ch); if (chSurrogate) sb.Append(chSurrogate); do { ch = PeekChar (p, &chSurrogate); switch (ch) { case '_': // Common identifier character, but we need check for double consecutive underscores if (!doubleUnderscore && ((PWSTR)sb)[sb.GetLength() - 1] == '_') doubleUnderscore = true; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { // Again, these are the 'common' identifier characters... break; } case ' ': case '\t': case '.': case ';': case '(': case ')': case ',': { // ...and these are the 'common' stop characters. goto LoopExit; } default: { // This is the 'expensive' call if (IsIdentifierCharOrDigit (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs { if (IsOtherFormat (ch)) { goto SkipChar; // Ignore formatting characters } } else { // Not a valid identifier character, so bail. goto LoopExit; } } } sb.Append (ch); if (chSurrogate) sb.Append(chSurrogate); SkipChar: ch = NextChar (p, &chSurrogate); } while (ch); LoopExit: HRESULT hr; if (!SUCCEEDED(hr = sb.GetResultCode())) { m_hr = hr; return TID_INVALID; } PCWSTR pszName = sb; long iLength = (long)sb.GetLength(); // "escaped" means there was an @ prefix, or there was a unicode escape -- both of which // indicate overhead, since the identifier length will not be equal to the token length fEscaped = (fAtPrefix || (p - pszToken > iLength)); if (sb.GetLength() >= MAX_IDENT_SIZE) { ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_IdentifierTooLong); iLength = MAX_IDENT_SIZE - 1; } int iKeyword; // Add the identifier to the name table pToken->pName = m_pNameMgr->AddString (pszName, iLength); // ...and check to see if it is a keyword, if appropriate if (fEscaped || !m_pNameMgr->IsNameKeyword (pToken->pName, m_eKeywordMode, &iKeyword)) { pToken->iUserByte = TID_IDENTIFIER; if (doubleUnderscore && !fAtPrefix && m_eKeywordMode == CompatibilityECMA1) { ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_ReservedIdentifier, pToken->pName->text); } if (fEscaped) { NAME *pName = pToken->pName; // Hold this so assignment to pEscName doesn't whack it pToken->iUserBits |= TF_OVERHEAD; pToken->pEscName = (ESCAPEDNAME *)TokenMemAlloc (pToken, sizeof (ESCAPEDNAME)); pToken->pEscName->iLen = (long)(p - pszToken); pToken->pEscName->pName = pName; } } else { pToken->iUserByte = iKeyword; pToken->iLength = iLength; } if (fAtPrefix) { pToken->iUserBits |= TF_VERBATIMSTRING; // We need to know this later } break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { BOOL fHexNumber; if ((fHexNumber = (ch == '0' && (*p == 'x' || *p == 'X')))) { // it's a hex constant p++; // It's OK if it has no digits after the '0x' -- we'll catch it in ScanNumericLiteral // and give a proper error then. while (*p <= 'f' && isxdigit (*p)) p++; if (*p == 'L' || *p == 'l') { p++; if (*p == 'u' || *p == 'U') p++; } else if (*p == 'u' || *p == 'U') { p++; if (*p == 'L' || *p == 'l') p++; } } else { // skip digits while (*p >= '0' && *p <= '9') p++; if (*p == '.') { pszHold = p++; if (*p >= '0' && *p <= '9') { // skip digits after decimal point p++; _parseNumber: fHexNumber = false; fReal = TRUE; while (*p >= '0' && *p <= '9') p++; } else { // Number + dot + non-digit -- these are separate tokens, so don't absorb the // dot token into the number. p = pszHold; size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; size_t cbBuffer = cchBuffer * sizeof(WCHAR); pToken->iUserByte = TID_NUMBER; pToken->iUserBits |= TF_OVERHEAD; pToken->pLiteral = (LITERAL *)TokenMemAlloc (pToken, sizeof (LITERAL) + cbBuffer); pToken->pLiteral->iSourceLength = (long)cchToken; wcsncpy_s (pToken->pLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pLiteral->szText[cchBuffer] = 0; break; } } if (*p == 'E' || *p == 'e') { fReal = TRUE; // skip exponent p++; if (*p == '+' || *p == '-') p++; while (*p >= '0' && *p <= '9') p++; } if (fReal) { if (*p == 'f' || *p == 'F' || *p == 'D' || *p == 'd' || *p == 'm' || *p == 'M') p++; } else if (*p == 'F' || *p == 'f' || *p == 'D' || *p == 'd' || *p == 'm' || *p == 'M') { p++; } else if (*p == 'L' || *p == 'l') { p++; if (*p == 'u' || *p == 'U') p++; } else if (*p == 'u' || *p == 'U') { p++; if (*p == 'L' || *p == 'l') p++; } } size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; size_t cbBuffer = cchBuffer * sizeof (WCHAR); pToken->iUserByte = TID_NUMBER; pToken->iUserBits |= TF_OVERHEAD; if (fHexNumber) pToken->iUserBits |= TF_HEXLITERAL; pToken->pLiteral = (LITERAL *)TokenMemAlloc (pToken, sizeof (LITERAL) + cbBuffer); pToken->pLiteral->iSourceLength = (long)(cchToken); wcsncpy_s (pToken->pLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pLiteral->szText[cchToken] = 0; break; } } // switch } // while m_pszCurrent = p; m_fFirstOnLine = FALSE; if (!m_fTokensSeen) m_fTokensSeen = ((CParser::m_rgTokenInfo[pToken->Token()].dwFlags & TFF_NOISE) == 0); return pToken->Token(); }
BabelTokens CLexer::NextToken (CString *retsToken, Options iMode) // NextToken // // Returns the next token in the stream { enum States { stStart, stStartCode, stDone, stDoneKeepNextChar, stCheckForDoubleColon, stCheckForComment, stIdentifier, stLineComment, stBlockComment, stCheckForEndBlockComment, stQuotedString, stEscapeQuote, stLineBlock, stCheckForEndLineBlock, stParagraph, stCheckForEndParagraph, stCheckForNegativeNumber, stInteger, stHexInteger, stCode, stCodeCheckForComment, stCodeLineComment, stCodeBlockComment, stCodeBlockCommentEnd, }; // Start in a different state depending on the mode States iState; if (iMode == modeCode) iState = stStartCode; else iState = stStart; // Initialize char *pStart; m_sToken = NULL; m_iToken = tkEOS; States iNextState; // Used for stEscapeQuote bool bBlankLine; // Used for stParagraph int iNesting; // Used for stStartCode while (iState != stDone && iState != stDoneKeepNextChar) { if (m_pPos == m_pEndPos) break; switch (iState) { case stStart: { switch (*m_pPos) { // Swallow whitespace case ' ': case '\t': case '\r': case '\n': break; // Comment case '/': iState = stCheckForComment; break; // Quoted string case '\"': pStart = m_pPos + 1; iState = stQuotedString; break; // Line block case '|': pStart = m_pPos + 1; iState = stLineBlock; break; // Paragraph case '¶': pStart = m_pPos + 1; bBlankLine = true; iState = stParagraph; break; // Symbols case '*': m_sToken = CONSTLIT("*"); m_iToken = tkStar; iState = stDone; break; case ':': iState = stCheckForDoubleColon; break; case ';': m_sToken = CONSTLIT(";"); m_iToken = tkSemiColon; iState = stDone; break; case '(': m_sToken = CONSTLIT("("); m_iToken = tkLeftParen; iState = stDone; break; case ')': m_sToken = CONSTLIT(")"); m_iToken = tkRightParen; iState = stDone; break; case '[': m_sToken = CONSTLIT("["); m_iToken = tkLeftBracket; iState = stDone; break; case ']': m_sToken = CONSTLIT("]"); m_iToken = tkRightBracket; iState = stDone; break; case '{': m_sToken = CONSTLIT("{"); m_iToken = tkLeftBrace; iState = stDone; break; case '}': m_sToken = CONSTLIT("}"); m_iToken = tkRightBrace; iState = stDone; break; case '=': m_sToken = CONSTLIT("="); m_iToken = tkEquals; iState = stDone; break; case '>': m_sToken = CONSTLIT(">"); m_iToken = tkGreaterThan; iState = stDone; break; case '<': m_sToken = CONSTLIT("<"); m_iToken = tkLessThan; iState = stDone; break; case ',': m_sToken = CONSTLIT(","); m_iToken = tkComma; iState = stDone; break; case '!': m_sToken = CONSTLIT("!"); m_iToken = tkBang; iState = stDone; break; case '-': pStart = m_pPos; iState = stCheckForNegativeNumber; break; default: if (IsDigit(*m_pPos)) { pStart = m_pPos; iState = stInteger; } else if (IsIdentifierChar(*m_pPos)) { pStart = m_pPos; iState = stIdentifier; } else { m_sToken = CString(m_pPos, 1); m_iToken = tkOtherSymbol; iState = stDone; break; } break; } break; } case stCheckForNegativeNumber: { if (IsDigit(*m_pPos)) iState = stInteger; else { m_sToken = CONSTLIT("-"); m_iToken = tkOtherSymbol; iState = stDoneKeepNextChar; } } case stCheckForDoubleColon: { if (*m_pPos == ':') { m_sToken = CONSTLIT("::"); m_iToken = tkDoubleColon; iState = stDone; } else { m_sToken = CONSTLIT(":"); m_iToken = tkColon; iState = stDoneKeepNextChar; } break; } case stCheckForComment: { switch (*m_pPos) { case '/': iState = stLineComment; break; case '*': iState = stBlockComment; break; default: { m_sToken = CONSTLIT("/"); m_iToken = tkSlash; iState = stDoneKeepNextChar; } } break; } case stInteger: { if (IsDigit(*m_pPos)) ; else if (*m_pPos == 'x' || *m_pPos == 'X') iState = stHexInteger; else { m_sToken.Append(CString(pStart, m_pPos - pStart)); m_iToken = tkInteger; iState = stDoneKeepNextChar; } break; } case stHexInteger: { if (IsDigit(*m_pPos) || (*m_pPos >= 'A' && *m_pPos <= 'F') || (*m_pPos >= 'a' && *m_pPos <= 'f')) ; else { m_sToken.Append(CString(pStart, m_pPos - pStart)); m_iToken = tkInteger; iState = stDoneKeepNextChar; } break; } case stLineComment: { switch (*m_pPos) { case '\n': iState = stStart; break; } break; } case stBlockComment: { switch (*m_pPos) { case '*': iState = stCheckForEndBlockComment; break; } break; } case stCheckForEndBlockComment: { switch (*m_pPos) { case '/': iState = stStart; break; case '*': break; default: iState = stBlockComment; break; } break; } case stIdentifier: { // If we're at the end, return it if (!IsIdentifierChar(*m_pPos)) { m_sToken = CString(pStart, m_pPos - pStart); m_iToken = tkIdentifier; iState = stDoneKeepNextChar; } break; } case stQuotedString: { switch (*m_pPos) { case '\"': { m_sToken.Append(CString(pStart, m_pPos - pStart)); m_iToken = tkString; iState = stDone; break; } case '\\': m_sToken.Append(CString(pStart, m_pPos - pStart)); iState = stEscapeQuote; iNextState = stQuotedString; break; } break; } case stEscapeQuote: { m_sToken.Append(CString(m_pPos, 1)); pStart = m_pPos+1; iState = iNextState; break; } case stLineBlock: { switch (*m_pPos) { case '\n': case '\r': m_sToken.Append(CString(pStart, m_pPos - pStart)); iState = stCheckForEndLineBlock; break; case '\\': m_sToken.Append(CString(pStart, m_pPos - pStart)); iState = stEscapeQuote; iNextState = stLineBlock; break; } break; } case stCheckForEndLineBlock: { switch (*m_pPos) { case ' ': case '\t': case '\r': case '\n': break; case '|': m_sToken.Append(CONSTLIT("\n")); pStart = m_pPos + 1; iState = stLineBlock; break; default: { m_iToken = tkLineBlock; iState = stDoneKeepNextChar; } } break; } case stParagraph: { switch (*m_pPos) { case '\n': case '\r': m_sToken.Append(CString(pStart, m_pPos - pStart)); iState = stCheckForEndParagraph; break; case '\\': m_sToken.Append(CString(pStart, m_pPos - pStart)); iState = stEscapeQuote; iNextState = stParagraph; break; default: bBlankLine = false; } break; } case stCheckForEndParagraph: { switch (*m_pPos) { case ' ': case '\t': case '\r': case '\n': break; case '|': if (bBlankLine) m_sToken.Append(CONSTLIT("\n")); else m_sToken.Append(CONSTLIT(" ")); bBlankLine = true; pStart = m_pPos + 1; iState = stParagraph; break; default: { m_iToken = tkParagraph; iState = stDoneKeepNextChar; } } break; } case stStartCode: { switch (*m_pPos) { // If this is a close brace then we are done case '}': { iState = stDone; m_iToken = tkCode; m_sToken = CONSTLIT(""); break; } // Otherwise, we keep parsing until our nesting // level is down to 0. default: { pStart = m_pPos; iNesting = 1; iState = stCode; } } break; } case stCode: { switch (*m_pPos) { case '/': { iState = stCodeCheckForComment; break; } case '{': { iNesting++; break; } case '}': { iNesting--; if (iNesting == 0) { iState = stDone; m_iToken = tkCode; m_sToken = CString(pStart, m_pPos - pStart); } break; } } break; } case stCodeCheckForComment: { switch (*m_pPos) { case '/': iState = stCodeLineComment; break; case '*': iState = stCodeBlockComment; break; default: { iState = stCode; } } break; } case stCodeLineComment: { switch (*m_pPos) { case '\r': case '\n': iState = stCode; break; } break; } case stCodeBlockComment: { switch (*m_pPos) { case '*': iState = stCodeBlockCommentEnd; break; } break; } case stCodeBlockCommentEnd: { switch (*m_pPos) { case '/': iState = stCode; break; default: iState = stCodeBlockComment; } break; } default: ASSERT(false); } if (iState != stDoneKeepNextChar) { if (*m_pPos == '\n') m_iLineNumber++; m_pPos++; } } // Done if (retsToken) *retsToken = m_sToken; return m_iToken; }