// get a parsed line. // if no more lines exist, returns false bool Parser::getLine(std::vector<Token> *o_tokens) { o_tokens->clear(); m_lineNumber = m_internalLineNumber; tstringi line; bool isTokenExist = false; continue_getLineLoop: while (getLine(&line)) { const _TCHAR *t = line.c_str(); continue_getTokenLoop: while (true) { // skip white space while (*t != _T('\0') && _istspace(*t)) t ++; if (*t == _T('\0') || *t == _T('#')) goto break_getTokenLoop; // no more tokens exist if (*t == _T('\\') && *(t + 1) == _T('\0')) goto continue_getLineLoop; // continue to next line const _TCHAR *tokenStart = t; // comma or empty token if (*t == _T(',')) { if (!isTokenExist) o_tokens->push_back(Token(_T(""), false)); isTokenExist = false; o_tokens->push_back(Token(Token::Type_comma)); t ++; goto continue_getTokenLoop; } // paren if (*t == _T('(')) { o_tokens->push_back(Token(Token::Type_openParen)); isTokenExist = false; t ++; goto continue_getTokenLoop; } if (*t == _T(')')) { if (!isTokenExist) o_tokens->push_back(Token(_T(""), false)); isTokenExist = true; o_tokens->push_back(Token(Token::Type_closeParen)); t ++; goto continue_getTokenLoop; } isTokenExist = true; // prefix if (m_prefixes) for (size_t i = 0; i < m_prefixes->size(); i ++) if (_tcsnicmp(tokenStart, m_prefixes->at(i).c_str(), m_prefixes->at(i).size()) == 0) { o_tokens->push_back(Token(m_prefixes->at(i), false)); t += m_prefixes->at(i).size(); goto continue_getTokenLoop; } // quoted or regexp if (*t == _T('"') || *t == _T('\'') || *t == _T('/') || (*t == _T('\\') && *(t + 1) == _T('m') && *(t + 2) != _T('\0'))) { bool isRegexp = !(*t == _T('"') || *t == _T('\'')); _TCHAR q[2] = { *t++, _T('\0') }; // quote character if (q[0] == _T('\\')) { t++; q[0] = *t++; } tokenStart = t; while (*t != _T('\0') && *t != q[0]) { if (*t == _T('\\') && *(t + 1)) t ++; if (_istlead(*t) && *(t + 1)) t ++; t ++; } tstring str = interpretMetaCharacters(tokenStart, t - tokenStart, q, isRegexp); #ifdef _MBCS if (isRegexp) str = guardRegexpFromMbcs(str.c_str()); #endif // concatinate continuous string if (!isRegexp && 0 < o_tokens->size() && o_tokens->back().isString() && o_tokens->back().isQuoted()) o_tokens->back().add(str); else o_tokens->push_back(Token(str, true, isRegexp)); if (*t != _T('\0')) t ++; goto continue_getTokenLoop; } // not quoted { while (isSymbolChar(*t)) { if (*t == _T('\\')) if (*(t + 1)) t ++; else break; if (_istlead(*t) && *(t + 1)) t ++; t ++; } if (t == tokenStart) { ErrorMessage e; e << _T("invalid character "); #ifdef UNICODE e << _T("U+"); e << std::hex; // << std::setw(4) << std::setfill(_T('0')); e << (int)(wchar_t)*t; #else e << _T("\\x"); e << std::hex; // << std::setw(2) << std::setfill(_T('0')); e << (int)(u_char)*t; #endif e << std::dec; if (_istprint(*t)) e << _T("(") << *t << _T(")"); throw e; } _TCHAR *numEnd = NULL; long value = _tcstol(tokenStart, &numEnd, 0); if (tokenStart == numEnd) { tstring str = interpretMetaCharacters(tokenStart, t - tokenStart); o_tokens->push_back(Token(str, false)); } else { o_tokens->push_back( Token(value, tstringi(tokenStart, numEnd - tokenStart))); t = numEnd; } goto continue_getTokenLoop; } } break_getTokenLoop: if (0 < o_tokens->size()) break; m_lineNumber = m_internalLineNumber; isTokenExist = false; } return 0 < o_tokens->size(); }
tokentype Lexer::nextToken() { bool sign; std::string strToken; /* skip over blanks and comments */ while(nextChar() && (isspace(m_cc) || (m_cc == '"'))) { if (m_cc == '"') { /* read comment */ while (nextChar() && (m_cc != '"')) ; if (! m_cc) break; /* break if we run into eof */ } } strToken.clear(); strToken.push_back(m_cc); if (! m_cc) /* end of input */ m_currentToken = inputend; else if (isalpha(m_cc)) { /* identifier */ while (nextChar() && isalnum(m_cc)) strToken.push_back(m_cc); if (m_cc == ':') { strToken.push_back(m_cc); m_currentToken = namecolon; } else { pushBack(m_cc); m_currentToken = nameconst; } } else if (isdigit(m_cc)) { /* number */ long longresult = m_cc - '0'; while (nextChar() && isdigit(m_cc)) { strToken.push_back(m_cc); longresult = (longresult * 10) + (m_cc - '0'); } if (longCanBeInt(longresult)) { m_tokenInteger = longresult; m_currentToken = intconst; } else { m_currentToken = floatconst; m_tokenFloat = (double) longresult; } if (m_cc == '.') { /* possible float */ if (nextChar() && isdigit(m_cc)) { strToken.push_back('.'); do strToken.push_back(m_cc); while (nextChar() && isdigit(m_cc)); if (m_cc) pushBack(m_cc); m_currentToken = floatconst; strToken.push_back('\0'); m_tokenFloat = atof(strToken.c_str()); printf("lex: %s\n", strToken.c_str()); } else { /* nope, just an ordinary period */ if (m_cc) pushBack(m_cc); pushBack('.'); } } else pushBack(m_cc); if (nextChar() && m_cc == 'e') { /* possible float */ if (nextChar() && m_cc == '-') { sign = true; nextChar(); } else sign = false; if (m_cc && isdigit(m_cc)) { /* yep, its a float */ strToken.push_back('e'); if (sign) strToken.push_back('-'); while (m_cc && isdigit(m_cc)) { strToken.push_back(m_cc); nextChar(); } if (m_cc) pushBack(m_cc); m_currentToken = floatconst; m_tokenFloat = atof(strToken.c_str()); } else { /* nope, wrong again */ if (m_cc) pushBack(m_cc); if (sign) pushBack('-'); pushBack('e'); } } else if (m_cc) pushBack(m_cc); } else if (m_cc == '$') { /* character constant */ m_tokenInteger = (int) nextChar(); m_currentToken = charconst; } else if (m_cc == '#') { /* symbol */ strToken.resize(strToken.size()-1); // erase pound sign if (nextChar() == '(') m_currentToken = arraybegin; else { pushBack(m_cc); while (nextChar() && isSymbolChar(m_cc)) strToken.push_back(m_cc); pushBack(m_cc); m_currentToken = symconst; } } else if (m_cc == '\'') { /* string constant */ strToken.resize(strToken.size()-1); strloop: while (nextChar() && (m_cc != '\'')) strToken.push_back(m_cc); /* check for nested quote marks */ if (m_cc && nextChar() && (m_cc == '\'')) { strToken.push_back(m_cc); goto strloop; } pushBack(m_cc); m_currentToken = strconst; } else if (isClosing(m_cc)) /* closing expressions */ m_currentToken = closing; else if (singleBinary(m_cc)) { /* single binary expressions */ m_currentToken = binary; } else { /* anything else is binary */ if (nextChar() && binarySecond(m_cc)) strToken.push_back(m_cc); else pushBack(m_cc); m_currentToken = binary; } m_tokenString = strToken; return(m_currentToken); }