int InputStream::SkipWhiteSpace(LPCWSTR morechars) { int c = PeekChar(); for(; IsWhiteSpace(c, morechars); c = PeekChar()) GetChar(); return c; }
/** * Put all text from the current position up to either EOL or the StopToken * into Token. Advances the compiler's current position. * * @param Token [out] will contain the text that was parsed * @param StopChar stop processing when this character is reached * * @return the number of character parsed */ bool FBaseParser::GetRawToken( FToken& Token, TCHAR StopChar /* = TCHAR('\n') */ ) { // Get token after whitespace. TCHAR Temp[MAX_STRING_CONST_SIZE]; int32 Length=0; TCHAR c = GetLeadingChar(); while( !IsEOL(c) && c != StopChar ) { if( (c=='/' && PeekChar()=='/') || (c=='/' && PeekChar()=='*') ) { break; } Temp[Length++] = c; if( Length >= MAX_STRING_CONST_SIZE ) { FError::Throwf(TEXT("Identifier exceeds maximum of %i characters"), (int32)MAX_STRING_CONST_SIZE ); } c = GetChar(true); } UngetChar(); // Get rid of trailing whitespace. while( Length>0 && (Temp[Length-1]==' ' || Temp[Length-1]==9 ) ) { Length--; } Temp[Length]=0; Token.SetConstString(Temp); return Length>0; }
VOID fnSetException ( VOID ) { UCHAR ch; UCHAR ch2; BOOLEAN fSetException; ch = PeekChar(); ch = (UCHAR)tolower(ch); if (ch == '\0') { ListDefaultBreak(); } else { pchCommand++; if (ch == 'e') { fSetException = TRUE; } else if (ch == 'd') { fSetException = FALSE; } else { error(SYNTAX); } ch = PeekChar(); ch = (UCHAR)tolower(ch); pchCommand++; ch2 = (UCHAR)tolower(*pchCommand); pchCommand++; if (ch == 'l' && ch2 == 'd') { fLoadDllBreak = fSetException; } } }
//------------------------------------------------------------------------------ bool FBasicTokenParser::GetRawTokenRespectingQuotes(FBasicToken& Token, TCHAR StopChar/* = TCHAR('\n')*/) { // if the parser is in a bad state, then don't continue parsing (who // knows what will happen!?) if (!IsValid()) { return false; } // Get token after whitespace. TCHAR Temp[MAX_STRING_CONST_SIZE]; int32 Length=0; TCHAR c = GetLeadingChar(); bool bInQuote = false; while( !IsEOL(c) && ((c != StopChar) || bInQuote) ) { if( (c=='/' && PeekChar()=='/') || (c=='/' && PeekChar()=='*') ) { break; } if (c == '"') { bInQuote = !bInQuote; } Temp[Length++] = c; if( Length >= MAX_STRING_CONST_SIZE ) { Length = ((int32)MAX_STRING_CONST_SIZE) - 1; Temp[Length]=0; // needs to happen for the error description below FText ErrorDesc = FText::Format(LOCTEXT("IdTooLong", "Identifer ({0}...) exceeds maximum length of {1}"), FText::FromString(Temp), FText::AsNumber((int32)MAX_STRING_CONST_SIZE)); SetError(FErrorState::ParseError, ErrorDesc); c = GetChar(true); break; } c = GetChar(true); } UngetChar(); // Get rid of trailing whitespace. while( Length>0 && (Temp[Length-1]==' ' || Temp[Length-1]==9 ) ) { Length--; } Temp[Length]=0; if (bInQuote) { FText ErrorDesc = FText::Format(LOCTEXT("NoClosingQuote", "Unterminated quoted string ({0})"), FText::FromString(Temp)); SetError(FErrorState::ParseError, ErrorDesc); } Token.SetConstString(Temp); return Length>0 && IsValid(); }
int InputStream::PeekChar() { while(m_queue.GetCount() < 2) PushChar(); ASSERT(m_queue.GetCount() == 2); if(m_queue.GetHead() == '/' && m_queue.GetTail() == '/') { while(!m_queue.IsEmpty()) PopChar(); int c; do { PushChar(); c = PopChar(); } while(!(c == '\n' || c == EOS)); return PeekChar(); } else if(m_queue.GetHead() == '/' && m_queue.GetTail() == '*') { while(!m_queue.IsEmpty()) PopChar(); int c1, c2; PushChar(); do { c2 = PushChar(); c1 = PopChar(); } while(!((c1 == '*' && c2 == '/') || c1 == EOS)); PopChar(); return PeekChar(); } return m_queue.GetHead(); }
bool FBaseParser::GetRawTokenRespectingQuotes( FToken& Token, TCHAR StopChar /* = TCHAR('\n') */ ) { // Get token after whitespace. TCHAR Temp[MAX_STRING_CONST_SIZE]; int32 Length=0; TCHAR c = GetLeadingChar(); bool bInQuote = false; while( !IsEOL(c) && ((c != StopChar) || bInQuote) ) { if( (c=='/' && PeekChar()=='/') || (c=='/' && PeekChar()=='*') ) { break; } if (c == '"') { bInQuote = !bInQuote; } Temp[Length++] = c; if( Length >= MAX_STRING_CONST_SIZE ) { FError::Throwf(TEXT("Identifier exceeds maximum of %i characters"), (int32)MAX_STRING_CONST_SIZE ); c = GetChar(true); Length = ((int32)MAX_STRING_CONST_SIZE) - 1; break; } c = GetChar(true); } UngetChar(); if (bInQuote) { FError::Throwf(TEXT("Unterminated quoted string")); } // Get rid of trailing whitespace. while( Length>0 && (Temp[Length-1]==' ' || Temp[Length-1]==9 ) ) { Length--; } Temp[Length]=0; Token.SetConstString(Temp); return Length>0; }
bool XMLReader::ParseText() { // CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) mValue.clear(); mNodeType = kWhitespace; mCurrentName.Clear(); while (true) { switch (PeekChar()) { case UnicodeChar(0xFFFF): case UnicodeChar('<'): return ! mValue.empty(); case UnicodeChar('&'): ReadChar(); if (! ParseReference(mValue)) return false; break; default: { UnicodeChar c = ReadChar(); if (! IsWhitespace(c)) mNodeType = kText; mValue += c; break; } } } }
bool XMLReader::ParseOptionalWhitespace() { while (IsWhitespace(PeekChar())) { ReadChar(); } return true; }
bool Loader::ReadCurrentLine(int &curPosInLine, char *buffer, int size) { if (feof(f)) return false; int CurPos = ftell(f); curPosInLine = -1; char c; do { RewindChar(); PeekChar(c); if (c != '\t') curPosInLine++; else curPosInLine += 4; } while(c != '\n' && ftell(f) > 0); EatReturns(); fgets(buffer, LOADER_INPUT_LENGTH, f); buffer[strlen(buffer) - 1] = '\0'; fseek(f, CurPos, SEEK_SET); return true; }
/// <summary>Reads the next line, if any</summary> /// <param name="line">The line.</param> /// <returns>True if read, false if EOF</returns> /// <exception cref="Logic::InvalidOperationException">Stream has been closed (reader has been move-copied)</exception> /// <exception cref="Logic::IOException">An I/O error occurred</exception> bool StringReader::ReadLine(wstring& line) { DWORD start = Position, // Start of line end = Length; // End of characters on line WCHAR ch; // EOF: Return false if (IsEOF()) { line.clear(); return false; } // Search for EOF/CRLF/CR while (ReadChar(ch)) { // CR/LF/CRLF: Set of chars marker, if (ch == '\r' || ch == '\n') { // Mark end-of-text end = Position-1; // Consume entire CRLF if present if (ch == '\r' && PeekChar(ch) && ch == '\n') ReadChar(ch); // Position marker now at start of new line, end-of-char marker at last character break; } } // Return line text without CRLF line = wstring(Buffer.get() + start, Buffer.get() + end); return true; }
bool XMLReader::ParseChar(UnicodeChar c) { if (PeekChar() == c) { ++mOutputStart; return true; } else return false; }
int nsXFormsXPathScanner::GetOffsetForNonWhite() { PRInt32 co = mOffset + mLength + 1; while (nsXFormsXPathXMLUtil::IsWhitespace(PeekChar(co))) co++; return co; }
bool CObjectIStreamJson::GetChar(char expect, bool skipWhiteSpace /* = false*/) { if ( PeekChar(skipWhiteSpace) != expect ) { return false; } m_Input.SkipChar(); return true; }
bool XMLReader::ParseRequiredWhitespace() { if (IsWhitespace(PeekChar())) { ReadChar(); ParseOptionalWhitespace(); return true; } else return false; }
//------------------------------------------------------------------------------ bool FBasicTokenParser::GetRawToken(FBasicToken& Token, TCHAR StopChar/* = TCHAR('\n')*/) { // if the parser is in a bad state, then don't continue parsing (who // knows what will happen!?) if (!IsValid()) { return false; } // Get token after whitespace. TCHAR Temp[MAX_STRING_CONST_SIZE]; int32 Length=0; TCHAR c = GetLeadingChar(); while( !IsEOL(c) && c != StopChar ) { if( (c=='/' && PeekChar()=='/') || (c=='/' && PeekChar()=='*') ) { break; } Temp[Length++] = c; if( Length >= MAX_STRING_CONST_SIZE ) { Temp[Length] = 0; FText ErrorDesc = FText::Format(LOCTEXT("IdTooLong", "Identifer ({0}...) exceeds maximum length of {1}"), FText::FromString(Temp), FText::AsNumber((int32)MAX_STRING_CONST_SIZE)); SetError(FErrorState::ParseError, ErrorDesc); } c = GetChar(true); } UngetChar(); // Get rid of trailing whitespace. while( Length>0 && (Temp[Length-1]==' ' || Temp[Length-1]==9 ) ) { Length--; } Temp[Length]=0; Token.SetConstString(Temp); return Length>0; }
nsXFormsXPathScanner::XPATHTOKEN nsXFormsXPathScanner::ScanWhitespace() { PRUnichar c; do { PopChar(); c = PeekChar(); } while (nsXFormsXPathXMLUtil::IsWhitespace(c)); return WHITESPACE; }
CObjectIStream::EPointerType CObjectIStreamJson::ReadPointerType(void) { char c = PeekChar(true); if (c == 'n') { string s = x_ReadData(); if (s != "null") { ThrowError(fFormatError, "null expected"); } return eNullPointer; } return eThisPointer; }
nsXFormsXPathScanner::XPATHTOKEN nsXFormsXPathScanner::ScanLiteral() { PRUnichar c = PopChar(); PRUnichar p; while ((p = PeekChar()) != c && p != '\0') PopChar(); if (p == '\0') return ERRORXPATHTOKEN; PopChar(); return LITERAL; }
bool XMLReader::ParseName(Name & name) { // Name ::= (Letter | '_' | ':') (NameChar)* UnicodeString temp; UnicodeChar c = ReadChar(); if ((c == UnicodeChar('_')) || (c == UnicodeChar(':')) || IsLetter(c)) { temp += c; while (IsNameChar(PeekChar())) temp += ReadChar(); name.SetName(temp); return true; } return false; }
TMemberIndex CObjectIStreamJson::BeginClassMember(const CClassTypeInfo* classType, TMemberIndex pos) { TMemberIndex first = classType->GetMembers().FirstIndex(); TMemberIndex last = classType->GetMembers().LastIndex(); if (m_RejectedTag.empty()) { if (pos == first) { if (classType->GetMemberInfo(first)->GetId().IsAttlist()) { TopFrame().SetNotag(); return first; } } } if ( !NextElement() ) { if (pos == last && classType->GetMemberInfo(pos)->GetId().HasNotag() && classType->GetMemberInfo(pos)->GetTypeInfo()->GetTypeFamily() == eTypeFamilyPrimitive) { TopFrame().SetNotag(); return pos; } return kInvalidMember; } char c = PeekChar(); if (m_RejectedTag.empty() && (c == '[' || c == '{')) { for (TMemberIndex i = pos; i <= last; ++i) { if (classType->GetMemberInfo(i)->GetId().HasNotag()) { TopFrame().SetNotag(); return i; } } } string tagName = ReadKey(); if (tagName[0] == '#') { tagName = tagName.substr(1); TopFrame().SetNotag(); } bool deep = false; TMemberIndex ind = FindDeep(classType->GetMembers(), tagName, deep); if (deep) { if (ind != kInvalidMember) { TopFrame().SetNotag(); } UndoClassMember(); } else if (ind != kInvalidMember) { if (classType->GetMembers().GetItemInfo(ind)->GetId().HasAnyContent()) { UndoClassMember(); } } return ind; }
nsXFormsXPathScanner::XPATHTOKEN nsXFormsXPathScanner::ScanNCName() { PRUnichar c = PopChar(); if (c != '_' && !nsXFormsXPathXMLUtil::IsLetter(c)) { return ERRORXPATHTOKEN; } while (nsXFormsXPathXMLUtil::IsNCNameChar(PeekChar())) { PopChar(); } return NCNAME; }
nsXFormsXPathScanner::XPATHTOKEN nsXFormsXPathScanner::ScanNumber() { PRUnichar c = PopChar(); PRBool decimal = (c == '.'); while (c != '\0') { c = PeekChar(); if (!decimal && c == '.') { decimal = PR_TRUE; } else if (!nsXFormsXPathXMLUtil::IsDigit(c)) { return NUMBER; } PopChar(); } return NUMBER; }
void CObjectIStreamJson::ReadAnyContentObject(CAnyContentObject& obj) { obj.Reset(); string value; string name = ReadKey(); obj.SetName(name); if (PeekChar(true) == '{') { StartBlock('{'); while (NextElement()) { name = ReadKey(); value = ReadValue(); if (name[0] != '#') { obj.AddAttribute(name,kEmptyStr,value); } else { obj.SetValue(value); } } EndBlock('}'); return; } value = ReadValue(); obj.SetValue(value); }
std::string GetWord( const std::string &blanks = " \r\n\t", int max_word_length = -1, int max_read_length = -1 ) { std::string result; bool is_blank[ 256 ]; memset( is_blank, 0, sizeof( is_blank ) ); for( int i = 0; i < ( int )blanks.length(); ++i ) { is_blank[ ( unsigned char )blanks[ i ] ] = true; } int ch; int read_length = 0; while( read_length != max_read_length && ( ch = PeekChar() ) != EOF && is_blank[ ch ] ) { GetChar(); ++read_length; } while( ( int )result.length() != max_word_length && read_length != max_read_length && ( ch = PeekChar() ) != EOF && !is_blank[ ch ] ) { result.push_back( ( char )GetChar() ); ++read_length; } return result; }
bool mcMapParser::SkipWhitespace() { char c; for(;;) { if( !PeekChar(c) ) { return false; } if(c != ' ' && c != '\t' && c != '\r') { break; } if( !GetChar(c) ) { return false; } } return true; }
//----------------------------------------------------------------- // // Function: fnStartProfilingDLL // // Purpose: Set up the breakpoints for profiling. The parseExamine() // routine is called to find the list of brkpts corresponding // to the user request. Breakpoints are set as each // function is found in parseExamine(). // // Input: Profile - pointer to profiling data structure // // Output: TRUE - successful // FALSE - unsuccessful - function name not entered // //------------------------------------------------------------------ BOOLEAN fnStartProfilingDLL (PSProfile *Profile) { UCHAR ch; UCHAR chDLL[50]; // Get the DLL name entered ch = PeekChar(); if (ch == '\0') { dprintf ("A DLL name must be entered with this command.\n"); return (FALSE); } // set command to invoke the 'x' functionality strcpy (chDLL, pchCommand); pchCommand = &chCommand[0]; strcpy (pchCommand, chDLL); // call function to parse the 'x' command parseExamine(); return (TRUE); }
TOKENID CLexer::ScanToken (CSTOKEN *pToken) { WCHAR ch, chQuote, chSurrogate = L'\0'; PCWSTR p = m_pszCurrent, pszHold = NULL, pszToken; BOOL fReal = FALSE, fEscaped = FALSE, fAtPrefix = FALSE; // Initialize for new token scan pToken->iChar = pToken->iLine = 0; pToken->iUserByte = TID_INVALID; pToken->iUserBits = 0; // Start scanning the token while (pToken->iUserByte == TID_INVALID) { if (!PositionOf (p, pToken) && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } pszToken = p; switch (ch = *p++) { case 0: { // Back up to point to the 0 again... p--; pToken->iUserByte = TID_ENDFILE; pToken->iLength = 0; break; } case '\t': case ' ': { // Tabs and spaces tend to roam in groups... scan them together while (*p == ' ' || *p == '\t') p++; break; } case UCH_PS: case UCH_LS: case 0x0085: case '\n': { // This is a new line TrackLine (p); break; } case '\r': { // Bare CR's are lines, but CRLF pairs are considered a single line. if (*p == '\n') p++; TrackLine (p); break; } // Other Whitespace characters case UCH_BOM: // Unicode Byte-order marker case 0x001A: // Ctrl+Z case '\v': // Vertical Tab case '\f': // Form-feed { break; } case '#': { p--; if (!ScanPreprocessorLine (p)) { ASSERT(!m_fPreproc); p++; ReportInvalidToken(pToken, pszToken, p); } break; } case '\"': case '\'': { CStringBuilder sb; // "Normal" strings (double-quoted and single-quoted (char) literals). We translate escape sequences // here, and construct the STRCONST (for strings) directly (char literals are encoded w/o overhead) chQuote = ch; while (*p != chQuote) { WCHAR c = *p++; if (c == '\\') { WCHAR c2 = 0; c = ScanEscapeSequence (p, &c2); // We use a string building to construct the string constant's value. Yes, CStringBuilder // is equipped to deal with embedded nul characters. sb.Append (c); if (c2 != 0) sb.Append (c2); } else if (IsEndOfLineChar (c) || c == 0) { ASSERT (p > pszToken); p--; ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_NewlineInConst); pToken->iUserBits |= TF_UNTERMINATED; break; } else { // We use a string building to construct the string constant's value. Yes, CStringBuilder // is equipped to deal with embedded nul characters. sb.Append (c); } } // Skip the terminating quote (if present) if ((pToken->iUserBits & TF_UNTERMINATED) == 0) p++; if (chQuote == '\'') { // This was a char literal -- no need to allocate overhead... if (sb.GetLength() != 1) ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), (sb.GetLength() != 0) ? ERR_TooManyCharsInConst : ERR_EmptyCharConst); pToken->iUserByte = TID_CHARLIT; pToken->chr.cCharValue = ((PCWSTR)sb)[0]; pToken->chr.iCharLen = (WCHAR)(p - pszToken); } else { // This one requires special allocation. pToken->iUserByte = TID_STRINGLIT; pToken->iUserBits |= TF_OVERHEAD; pToken->pStringLiteral = (STRLITERAL *)TokenMemAlloc (pToken, sizeof (STRLITERAL) + (sb.GetLength() * sizeof (WCHAR))); pToken->pStringLiteral->iSourceLength = (long)(p - pszToken); pToken->pStringLiteral->str.length = (long)sb.GetLength(); pToken->pStringLiteral->str.text = (WCHAR *)(pToken->pStringLiteral + 1); memcpy (pToken->pStringLiteral->str.text, (PCWSTR)sb, pToken->pStringLiteral->str.length * sizeof (WCHAR)); } break; } case '/': { // Lotsa things start with slash... switch (*p) { case '/': { // Single-line comments... bool fDocComment = (p[1] == '/' && p[2] != '/'); // Find the end of the line, and make sure it's not too long (even for non-doc comments...) while (*p != 0 && !IsEndOfLineChar (*p)) { if (p - m_pszCurLine >= MAX_POS_LINE_LEN && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } p++; } // Only put comments in the token stream if asked if (RepresentNoiseTokens ()) { if (fDocComment) { size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; size_t cbBuffer = cchBuffer * sizeof(WCHAR); // Doc comments require, ironically enough, overhead in the token stream. pToken->iUserByte = TID_DOCCOMMENT; pToken->iUserBits |= TF_OVERHEAD; pToken->pDocLiteral = (DOCLITERAL *)TokenMemAlloc (pToken, sizeof (DOCLITERAL) + cbBuffer); pToken->pDocLiteral->posEnd = POSDATA(m_iCurLine, (long)(p - m_pszCurLine)); wcsncpy_s (pToken->pDocLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pDocLiteral->szText[cchToken] = 0; } else { // No overhead incurred for single-line non-doc comments, but we do need the length. pToken->iUserByte = TID_SLCOMMENT; pToken->iLength = (long)(p - pszToken); } } break; } case '*': { bool fDocComment = (p[1] == '*' && p[2] != '*'); BOOL fDone = FALSE; // Multi-line comments... p++; while (!fDone) { if (*p == 0) { // The comment didn't end. Report an error at the start point. ErrorAtPosition (pToken->iLine, pToken->iChar, 2, ERR_OpenEndedComment); if (RepresentNoiseTokens ()) pToken->iUserBits |= TF_UNTERMINATED; fDone = TRUE; break; } if (*p == '*' && p[1] == '/') { p += 2; break; } if (IsEndOfLineChar (*p)) { if (*p == '\r' && p[1] == '\n') p++; TrackLine (++p); } else { p++; } } m_fFirstOnLine = FALSE; if (RepresentNoiseTokens ()) { pToken->iUserBits |= TF_OVERHEAD; if (fDocComment) { // Doc comments require, ironically enough, overhead in the token stream. size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; //+1 for null size_t cbBuffer = cchBuffer * sizeof(WCHAR); pToken->iUserByte = TID_MLDOCCOMMENT; pToken->pDocLiteral = (DOCLITERAL *)TokenMemAlloc (pToken, sizeof (DOCLITERAL) + cbBuffer); pToken->pDocLiteral->posEnd = POSDATA(m_iCurLine, (long)(p - m_pszCurLine)); wcsncpy_s (pToken->pDocLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pDocLiteral->szText[cchToken] = 0; if (p - m_pszCurLine >= MAX_POS_LINE_LEN && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } } else { // For multi-line comments, we don't put the text in but we do need the // end position -- which means ML comments incur overhead... :-( pToken->iUserByte = TID_MLCOMMENT; pToken->pposEnd = (POSDATA *)TokenMemAlloc (pToken, sizeof (POSDATA)); if (!PositionOf (p, pToken->pposEnd) && !m_fThisLineTooLong) { ErrorAtPosition (m_iCurLine, MAX_POS_LINE_LEN - 1, 1, ERR_LineTooLong, MAX_POS_LINE_LEN); m_fLimitExceeded = TRUE; m_fThisLineTooLong = TRUE; } } } break; } case '=': { p++; pToken->iUserByte = TID_SLASHEQUAL; pToken->iLength = 2; break; } default: { pToken->iUserByte = TID_SLASH; pToken->iLength = 1; break; } } break; } case '.': { if (*p >= '0' && *p <= '9') { p++; ch = 0; goto _parseNumber; } pToken->iUserByte = TID_DOT; pToken->iLength = 1; break; } case ',': pToken->iUserByte = TID_COMMA; pToken->iLength = 1; break; case ':': if (*p == ':') { pToken->iUserByte = TID_COLONCOLON; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_COLON; pToken->iLength = 1; } break; case ';': pToken->iUserByte = TID_SEMICOLON; pToken->iLength = 1; break; case '~': pToken->iUserByte = TID_TILDE; pToken->iLength = 1; break; case '!': { if (*p == '=') { pToken->iUserByte = TID_NOTEQUAL; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_BANG; pToken->iLength = 1; } break; } case '=': { if (*p == '=') { pToken->iUserByte = TID_EQUALEQUAL; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_EQUAL; pToken->iLength = 1; } break; } case '*': { if (*p == '=') { pToken->iUserByte = TID_SPLATEQUAL; pToken->iLength = 2; p++; } else { pToken->iUserByte = TID_STAR; pToken->iLength = 1; } break; } case '(': { pToken->iUserByte = TID_OPENPAREN; pToken->iLength = 1; break; } case ')': { pToken->iUserByte = TID_CLOSEPAREN; pToken->iLength = 1; break; } case '{': { pToken->iUserByte = TID_OPENCURLY; pToken->iLength = 1; break; } case '}': { pToken->iUserByte = TID_CLOSECURLY; pToken->iLength = 1; break; } case '[': { pToken->iUserByte = TID_OPENSQUARE; pToken->iLength = 1; break; } case ']': { pToken->iUserByte = TID_CLOSESQUARE; pToken->iLength = 1; break; } case '?': { if (*p == '?') { p++; pToken->iUserByte = TID_QUESTQUEST; pToken->iLength = 2; } else { pToken->iUserByte = TID_QUESTION; pToken->iLength = 1; } break; } case '+': { if (*p == '=') { p++; pToken->iUserByte = TID_PLUSEQUAL; pToken->iLength = 2; } else if (*p == '+') { p++; pToken->iUserByte = TID_PLUSPLUS; pToken->iLength = 2; } else { pToken->iUserByte = TID_PLUS; pToken->iLength = 1; } break; } case '-': { if (*p == '=') { p++; pToken->iUserByte = TID_MINUSEQUAL; pToken->iLength = 2; } else if (*p == '-') { p++; pToken->iUserByte = TID_MINUSMINUS; pToken->iLength = 2; } else if (*p == '>') { p++; pToken->iUserByte = TID_ARROW; pToken->iLength = 2; } else { pToken->iUserByte = TID_MINUS; pToken->iLength = 1; } break; } case '%': { if (*p == '=') { p++; pToken->iUserByte = TID_MODEQUAL; pToken->iLength = 2; } else { pToken->iUserByte = TID_PERCENT; pToken->iLength = 1; } break; } case '&': { if (*p == '=') { p++; pToken->iUserByte = TID_ANDEQUAL; pToken->iLength = 2; } else if (*p == '&') { p++; pToken->iUserByte = TID_LOG_AND; pToken->iLength = 2; } else { pToken->iUserByte = TID_AMPERSAND; pToken->iLength = 1; } break; } case '^': { if (*p == '=') { p++; pToken->iUserByte = TID_HATEQUAL; pToken->iLength = 2; } else { pToken->iUserByte = TID_HAT; pToken->iLength = 1; } break; } case '|': { if (*p == '=') { p++; pToken->iUserByte = TID_BAREQUAL; pToken->iLength = 2; } else if (*p == '|') { p++; pToken->iUserByte = TID_LOG_OR; pToken->iLength = 2; } else { pToken->iUserByte = TID_BAR; pToken->iLength = 1; } break; } case '<': { if (*p == '=') { p++; pToken->iUserByte = TID_LESSEQUAL; pToken->iLength = 2; } else if (*p == '<') { p++; if (*p == '=') { p++; pToken->iUserByte = TID_SHIFTLEFTEQ; pToken->iLength = 3; } else { pToken->iUserByte = TID_SHIFTLEFT; pToken->iLength = 2; } } else { pToken->iUserByte = TID_LESS; pToken->iLength = 1; } break; } case '>': { if (*p == '=') { p++; pToken->iUserByte = TID_GREATEREQUAL; pToken->iLength = 2; } else { pToken->iUserByte = TID_GREATER; pToken->iLength = 1; } break; } case '@': { if (*p == '"') { CStringBuilder sb; BOOL fDone = FALSE; WCHAR c; // Verbatim string literal. While scanning/accumulating its value into // the string builder, track lines and ignore escape characters (they don't // apply in VSL's) -- watch for double-quotes as well. p++; while (!fDone) { switch (c = *p++) { case UCH_PS: case UCH_LS: case 0x0085: case '\n': { TrackLine (p); break; } case '\r': { if (*p == '\n') { sb.Append (c); c = *p++; } TrackLine (p); break; } case '\"': { if (*p == '\"') p++; // Doubled quote -- skip & put the single quote in the string else fDone = TRUE; break; } case 0: { // Reached the end of the source without finding the end-quote. Give // an error back at the starting point. ErrorAtPosition (pToken->iLine, pToken->iChar, 2, ERR_UnterminatedStringLit); pToken->iUserBits |= TF_UNTERMINATED; fDone = TRUE; p--; break; } default: ASSERT(!IsEndOfLineChar(c)); break; } if (!fDone) sb.Append (c); } pToken->iUserByte = TID_VSLITERAL; pToken->iUserBits |= TF_OVERHEAD; pToken->pVSLiteral = (VSLITERAL *)TokenMemAlloc (pToken, sizeof (VSLITERAL) + (sb.GetLength() * sizeof (WCHAR))); PositionOf (p, &pToken->pVSLiteral->posEnd); pToken->pVSLiteral->str.length = (long)sb.GetLength(); pToken->pVSLiteral->str.text = (WCHAR *)(pToken->pVSLiteral + 1); memcpy (pToken->pVSLiteral->str.text, (PCWSTR)sb, sb.GetLength() * sizeof (WCHAR)); break; } // Check for identifiers. NOTE: unicode escapes are allowed here! ch = PeekChar(p, &chSurrogate); if (!IsIdentifierChar (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs { // After the '@' we have neither an identifier nor and string quote, so assume it is an identifier. CreateInvalidToken(pToken, pszToken, p); ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_ExpectedVerbatimLiteral); break; } ch = NextChar(p, &chSurrogate); fAtPrefix = TRUE; goto _ParseIdentifier; // (Goto avoids the IsSpaceSeparator() check and the redundant IsIdentifierChar() check below...) } case '\\': // Could be unicode escape. Try that. --p; ch = NextChar (p, &chSurrogate); // If we had a unicode escape, ch is it. If we didn't, ch is still a backslash. Unicode escape // must start an identifers, so check only for identifiers now. goto _CheckIdentifier; default: ASSERT(!IsEndOfLineChar(ch)); if (IsSpaceSeparator (ch)) // Unicode class 'Zs' { while (IsSpaceSeparator(*p)) p++; break; } _CheckIdentifier: if (!IsIdentifierChar (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs { ReportInvalidToken(pToken, pszToken, p); break; } // Fall through case. All the 'common' identifier characters are represented directly in // these switch cases for optimal perf. Calling IsIdentifierChar() functions is relatively // expensive. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': _ParseIdentifier: { CStringBuilder sb; bool doubleUnderscore = false; // Remember, because we're processing identifiers here, unicode escape sequences are // allowed and must be handled sb.Append (ch); if (chSurrogate) sb.Append(chSurrogate); do { ch = PeekChar (p, &chSurrogate); switch (ch) { case '_': // Common identifier character, but we need check for double consecutive underscores if (!doubleUnderscore && ((PWSTR)sb)[sb.GetLength() - 1] == '_') doubleUnderscore = true; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { // Again, these are the 'common' identifier characters... break; } case ' ': case '\t': case '.': case ';': case '(': case ')': case ',': { // ...and these are the 'common' stop characters. goto LoopExit; } default: { // This is the 'expensive' call if (IsIdentifierCharOrDigit (ch)) // BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs { if (IsOtherFormat (ch)) { goto SkipChar; // Ignore formatting characters } } else { // Not a valid identifier character, so bail. goto LoopExit; } } } sb.Append (ch); if (chSurrogate) sb.Append(chSurrogate); SkipChar: ch = NextChar (p, &chSurrogate); } while (ch); LoopExit: HRESULT hr; if (!SUCCEEDED(hr = sb.GetResultCode())) { m_hr = hr; return TID_INVALID; } PCWSTR pszName = sb; long iLength = (long)sb.GetLength(); // "escaped" means there was an @ prefix, or there was a unicode escape -- both of which // indicate overhead, since the identifier length will not be equal to the token length fEscaped = (fAtPrefix || (p - pszToken > iLength)); if (sb.GetLength() >= MAX_IDENT_SIZE) { ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_IdentifierTooLong); iLength = MAX_IDENT_SIZE - 1; } int iKeyword; // Add the identifier to the name table pToken->pName = m_pNameMgr->AddString (pszName, iLength); // ...and check to see if it is a keyword, if appropriate if (fEscaped || !m_pNameMgr->IsNameKeyword (pToken->pName, m_eKeywordMode, &iKeyword)) { pToken->iUserByte = TID_IDENTIFIER; if (doubleUnderscore && !fAtPrefix && m_eKeywordMode == CompatibilityECMA1) { ErrorAtPosition (m_iCurLine, (long)(pszToken - m_pszCurLine), (long)(p - pszToken), ERR_ReservedIdentifier, pToken->pName->text); } if (fEscaped) { NAME *pName = pToken->pName; // Hold this so assignment to pEscName doesn't whack it pToken->iUserBits |= TF_OVERHEAD; pToken->pEscName = (ESCAPEDNAME *)TokenMemAlloc (pToken, sizeof (ESCAPEDNAME)); pToken->pEscName->iLen = (long)(p - pszToken); pToken->pEscName->pName = pName; } } else { pToken->iUserByte = iKeyword; pToken->iLength = iLength; } if (fAtPrefix) { pToken->iUserBits |= TF_VERBATIMSTRING; // We need to know this later } break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { BOOL fHexNumber; if ((fHexNumber = (ch == '0' && (*p == 'x' || *p == 'X')))) { // it's a hex constant p++; // It's OK if it has no digits after the '0x' -- we'll catch it in ScanNumericLiteral // and give a proper error then. while (*p <= 'f' && isxdigit (*p)) p++; if (*p == 'L' || *p == 'l') { p++; if (*p == 'u' || *p == 'U') p++; } else if (*p == 'u' || *p == 'U') { p++; if (*p == 'L' || *p == 'l') p++; } } else { // skip digits while (*p >= '0' && *p <= '9') p++; if (*p == '.') { pszHold = p++; if (*p >= '0' && *p <= '9') { // skip digits after decimal point p++; _parseNumber: fHexNumber = false; fReal = TRUE; while (*p >= '0' && *p <= '9') p++; } else { // Number + dot + non-digit -- these are separate tokens, so don't absorb the // dot token into the number. p = pszHold; size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; size_t cbBuffer = cchBuffer * sizeof(WCHAR); pToken->iUserByte = TID_NUMBER; pToken->iUserBits |= TF_OVERHEAD; pToken->pLiteral = (LITERAL *)TokenMemAlloc (pToken, sizeof (LITERAL) + cbBuffer); pToken->pLiteral->iSourceLength = (long)cchToken; wcsncpy_s (pToken->pLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pLiteral->szText[cchBuffer] = 0; break; } } if (*p == 'E' || *p == 'e') { fReal = TRUE; // skip exponent p++; if (*p == '+' || *p == '-') p++; while (*p >= '0' && *p <= '9') p++; } if (fReal) { if (*p == 'f' || *p == 'F' || *p == 'D' || *p == 'd' || *p == 'm' || *p == 'M') p++; } else if (*p == 'F' || *p == 'f' || *p == 'D' || *p == 'd' || *p == 'm' || *p == 'M') { p++; } else if (*p == 'L' || *p == 'l') { p++; if (*p == 'u' || *p == 'U') p++; } else if (*p == 'u' || *p == 'U') { p++; if (*p == 'L' || *p == 'l') p++; } } size_t cchToken = (p - pszToken); size_t cchBuffer = cchToken + 1; size_t cbBuffer = cchBuffer * sizeof (WCHAR); pToken->iUserByte = TID_NUMBER; pToken->iUserBits |= TF_OVERHEAD; if (fHexNumber) pToken->iUserBits |= TF_HEXLITERAL; pToken->pLiteral = (LITERAL *)TokenMemAlloc (pToken, sizeof (LITERAL) + cbBuffer); pToken->pLiteral->iSourceLength = (long)(cchToken); wcsncpy_s (pToken->pLiteral->szText, cchBuffer, pszToken, cchToken); pToken->pLiteral->szText[cchToken] = 0; break; } } // switch } // while m_pszCurrent = p; m_fFirstOnLine = FALSE; if (!m_fTokensSeen) m_fTokensSeen = ((CParser::m_rgTokenInfo[pToken->Token()].dwFlags & TFF_NOISE) == 0); return pToken->Token(); }
// Gets the next token from the input stream, advancing the variables which keep track of the current input position and line. bool FBaseParser::GetToken( FToken& Token, bool bNoConsts/*=false*/, ESymbolParseOption bParseTemplateClosingBracket/*=ESymbolParseOption::Normal*/ ) { Token.TokenName = NAME_None; TCHAR c = GetLeadingChar(); TCHAR p = PeekChar(); if( c == 0 ) { UngetChar(); return 0; } Token.StartPos = PrevPos; Token.StartLine = PrevLine; if( (c>='A' && c<='Z') || (c>='a' && c<='z') || (c=='_') ) { // Alphanumeric token. int32 Length=0; do { Token.Identifier[Length++] = c; if( Length >= NAME_SIZE ) { FError::Throwf(TEXT("Identifer length exceeds maximum of %i"), (int32)NAME_SIZE); Length = ((int32)NAME_SIZE) - 1; break; } c = GetChar(); } while( ((c>='A')&&(c<='Z')) || ((c>='a')&&(c<='z')) || ((c>='0')&&(c<='9')) || (c=='_') ); UngetChar(); Token.Identifier[Length]=0; // Assume this is an identifier unless we find otherwise. Token.TokenType = TOKEN_Identifier; // Lookup the token's global name. Token.TokenName = FName( Token.Identifier, FNAME_Find, true ); // If const values are allowed, determine whether the identifier represents a constant if ( !bNoConsts ) { // See if the identifier is part of a vector, rotation or other struct constant. // boolean true/false if( Token.Matches(TEXT("true")) ) { Token.SetConstBool(true); return true; } else if( Token.Matches(TEXT("false")) ) { Token.SetConstBool(false); return true; } } return true; } // if const values are allowed, determine whether the non-identifier token represents a const else if ( !bNoConsts && ((c>='0' && c<='9') || ((c=='+' || c=='-') && (p>='0' && p<='9'))) ) { // Integer or floating point constant. bool bIsFloat = 0; int32 Length = 0; bool bIsHex = 0; do { if( c==TEXT('.') ) { bIsFloat = true; } if( c==TEXT('X') || c == TEXT('x') ) { bIsHex = true; } Token.Identifier[Length++] = c; if( Length >= NAME_SIZE ) { FError::Throwf(TEXT("Number length exceeds maximum of %i "), (int32)NAME_SIZE ); Length = ((int32)NAME_SIZE) - 1; break; } c = FChar::ToUpper(GetChar()); } while ((c >= TEXT('0') && c <= TEXT('9')) || (!bIsFloat && c == TEXT('.')) || (!bIsHex && c == TEXT('X')) || (bIsHex && c >= TEXT('A') && c <= TEXT('F'))); Token.Identifier[Length]=0; if (!bIsFloat || c != 'F') { UngetChar(); } if (bIsFloat) { Token.SetConstFloat( FCString::Atof(Token.Identifier) ); } else if (bIsHex) { TCHAR* End = Token.Identifier + FCString::Strlen(Token.Identifier); Token.SetConstInt( FCString::Strtoi(Token.Identifier,&End,0) ); } else { Token.SetConstInt( FCString::Atoi(Token.Identifier) ); } return true; } else if (c == '\'') { TCHAR ActualCharLiteral = GetChar(/*bLiteral=*/ true); if (ActualCharLiteral == '\\') { ActualCharLiteral = GetChar(/*bLiteral=*/ true); switch (ActualCharLiteral) { case TCHAR('t'): ActualCharLiteral = '\t'; break; case TCHAR('n'): ActualCharLiteral = '\n'; break; case TCHAR('r'): ActualCharLiteral = '\r'; break; } } c = GetChar(/*bLiteral=*/ true); if (c != '\'') { FError::Throwf(TEXT("Unterminated character constant")); UngetChar(); } Token.SetConstChar(ActualCharLiteral); return true; } else if (c == '"') { // String constant. TCHAR Temp[MAX_STRING_CONST_SIZE]; int32 Length=0; c = GetChar(/*bLiteral=*/ true); while( (c!='"') && !IsEOL(c) ) { if( c=='\\' ) { c = GetChar(/*bLiteral=*/ true); if( IsEOL(c) ) { break; } else if(c == 'n') { // Newline escape sequence. c = '\n'; } } Temp[Length++] = c; if( Length >= MAX_STRING_CONST_SIZE ) { FError::Throwf(TEXT("String constant exceeds maximum of %i characters"), (int32)MAX_STRING_CONST_SIZE ); c = TEXT('\"'); Length = ((int32)MAX_STRING_CONST_SIZE) - 1; break; } c = GetChar(/*bLiteral=*/ true); } Temp[Length]=0; if( c != '"' ) { FError::Throwf(TEXT("Unterminated string constant: %s"), Temp); UngetChar(); } Token.SetConstString(Temp); return true; } else { // Symbol. int32 Length=0; Token.Identifier[Length++] = c; // Handle special 2-character symbols. #define PAIR(cc,dd) ((c==cc)&&(d==dd)) /* Comparison macro for convenience */ TCHAR d = GetChar(); if ( PAIR('<','<') || (PAIR('>','>') && (bParseTemplateClosingBracket != ESymbolParseOption::CloseTemplateBracket)) || PAIR('!','=') || PAIR('<','=') || PAIR('>','=') || PAIR('+','+') || PAIR('-','-') || PAIR('+','=') || PAIR('-','=') || PAIR('*','=') || PAIR('/','=') || PAIR('&','&') || PAIR('|','|') || PAIR('^','^') || PAIR('=','=') || PAIR('*','*') || PAIR('~','=') || PAIR(':',':') ) { Token.Identifier[Length++] = d; if( c=='>' && d=='>' ) { if( GetChar()=='>' ) Token.Identifier[Length++] = '>'; else UngetChar(); } } else UngetChar(); #undef PAIR Token.Identifier[Length] = 0; Token.TokenType = TOKEN_Symbol; // Lookup the token's global name. Token.TokenName = FName( Token.Identifier, FNAME_Find, true ); return true; } }
// // Skip past all spaces and tabs in the input stream. // TCHAR FBaseParser::GetLeadingChar() { TCHAR TrailingCommentNewline = 0; for (;;) { bool MultipleNewlines = false; TCHAR c; // Skip blanks. do { c = GetChar(); // Check if we've encountered another newline since the last one if (c == TrailingCommentNewline) { MultipleNewlines = true; } } while (IsWhitespace(c)); if (c != TEXT('/') || PeekChar() != TEXT('/')) { return c; } // Clear the comment if we've encountered newlines since the last comment if (MultipleNewlines) { ClearComment(); } // Record the first slash. The first iteration of the loop will get the second slash. PrevComment += c; do { c = GetChar(true); if (c == 0) return c; PrevComment += c; } while (!IsEOL(c)); TrailingCommentNewline = c; for (;;) { c = GetChar(); if (c == 0) return c; if (c == TrailingCommentNewline || !IsEOL(c)) { UngetChar(); break; } PrevComment += c; } } }
// // Get a single character from the input stream and return it, or 0=end. // TCHAR FBaseParser::GetChar(bool bLiteral) { bool bInsideComment = false; PrevPos = InputPos; PrevLine = InputLine; Loop: const TCHAR c = Input[InputPos++]; if (bInsideComment) { // Record the character as a comment. PrevComment += c; } if (c == TEXT('\n')) { InputLine++; } else if (!bLiteral) { const TCHAR NextChar = PeekChar(); if ( c==TEXT('/') && NextChar==TEXT('*') ) { if (!bInsideComment) { ClearComment(); // Record the slash and star. PrevComment += c; PrevComment += NextChar; bInsideComment = true; // Move past the star. Do it only when not in comment, // otherwise end of comment might be missed e.g. // /*/ Comment /*/ // ~~~~~~~~~~~~~^ Will report second /* as beginning of comment // And throw error that end of file is found in comment. InputPos++; } goto Loop; } else if( c==TEXT('*') && NextChar==TEXT('/') ) { if (!bInsideComment) { ClearComment(); FError::Throwf(TEXT("Unexpected '*/' outside of comment") ); } /** Asterisk and slash always end comment. */ bInsideComment = false; // Star already recorded; record the slash. PrevComment += Input[InputPos]; InputPos++; goto Loop; } } if (bInsideComment) { if (c == 0) { ClearComment(); FError::Throwf(TEXT("End of class header encountered inside comment") ); } goto Loop; } return c; }