void GString::TrimRightWS() { while ((_len) && (isWS(_str[_len - 1]))) _len--; _str[_len] = 0; }
void GString::TrimLeftWS() { int i = 0; while ((i < _len) && (isWS(_str[i]))) i++; if (i != 0) { _len -= i; memmove(_str, &_str[i], _len + 1); } }
bool CueTokenizer<CharT>::nextline() { m_fields.clear(); int_type c; std::basic_string<CharT> field; while (traits_type::not_eof(c = m_sb->sbumpc())) { if (c == '"') { // eat until closing quote while (traits_type::not_eof(c = m_sb->sbumpc())) { if (c == '\n') throw std::runtime_error(format( "Runaway string at line %d", m_lineno + 1)); else if (c != '"') field.push_back(c); else if (m_sb->sgetc() != '"') // closing quote break; else { // escaped quote m_sb->snextc(); field.push_back(c); } } } else if (c == '\n') { ++m_lineno; break; } else if (isWS(c)) { if (field.size()) { m_fields.push_back(field); field.clear(); } while (isWS(m_sb->sgetc())) m_sb->snextc(); } else field.push_back(c); } if (field.size()) m_fields.push_back(field); return field.size() > 0 || c == '\n'; }
//! Checks if there is whitespace in the range specified in the string inline std::string findComment(const ExprNode& node) { const Expression& expr = *node.expr(); typedef std::vector<std::pair<int, int> > Comments; const Comments& comments = expr.getComments(); const std::string& s = expr.getExpr(); // TODO: user lower_bound to make this O(lg n) instead of O(n) for (Comments::const_iterator i = comments.begin(); i != comments.end(); ++i) { if (i->first >= node.endPos() && isWS(s.c_str(), node.endPos(), i->first)) return s.substr(i->first, i->second - i->first + 1); } return ""; }
bool Lexer::GetToken(Token&obj) { string strBuf; int id; DataValue dv; while (!isEOF()) { skipWS(); ///////////// // Check for chr/strlit // if (nextChar() == '-') bool b=true; if (nextChar() == '\'' || nextChar() == '\"') { char ch = getChar(); strBuf = ""; while (!isEOF()) { if (nextChar() == '\r' || nextChar() == '\n') { dv.SetStrData("Newline in constant."); throw Token(Token::LEX_ERROR,dv); } if (nextChar() == ch) { getChar(); if (ch == '\'') { if (strBuf.length() != 1) { dv.SetStrData("Character literals must have exactly one character."); throw Token(Token::LEX_ERROR,dv); } dv.SetCharData(strBuf[0]); obj=Token(Token::LEX_CHRLIT,dv); return true; } else { dv.SetStrData(strBuf); obj=Token(Token::LEX_STRLIT,dv); return true; } } strBuf += getChar(); } dv.SetStrData("EOF in constant."); throw Token(Token::LEX_ERROR,dv); } ///////////// // Check for Operators, then keywords // StorePosition(); id = dfaOperators.GetString(strBuf); if (id == 0) { ResetPosition(); id = dfaKeywords.GetString(strBuf); if (id == 0) { ///////////// // Check for numbers/idents // while (!isEOF() && !isWS(nextChar()) && !dfaOperators.ValidFirst(nextChar())) { strBuf += toLower(getChar()); } //if it's all numbers if (strBuf.find_first_not_of("0123456789",0,10) == string::npos) { //read in anything, including dots (will cover floats and invalid idents) while (!isEOF() && !isWS(nextChar()) && (nextChar() == '.' || !dfaOperators.ValidFirst(nextChar()))) { strBuf += toLower(getChar()); } } bool found = false; if (strBuf.find_first_not_of("0123456789.",0,11) == string::npos) { string::size_type off; off = strBuf.find('.',0); if (off == string::npos) { dv.SetIntData(atoi(strBuf.c_str())); obj=Token(Token::LEX_INTLIT,dv); return true; } else { if (strBuf.find('.',off+1) == string::npos) { dv.SetFloatData(atof(strBuf.c_str())); obj=Token(Token::LEX_FLOLIT,dv); return true; } } } //validate identifier try { if (strBuf.length() > 20) throw 2; for (string::iterator it = strBuf.begin();it != strBuf.end();++it) { if (it == strBuf.begin()) { if (!isAlpha(*it)) { throw 0; } } if (!isAlpha(*it) && *it != '_' && !isNum(*it)) { throw 1; } } } catch(int iError) { switch (iError) { case 0: dv.SetStrData("Unrecognized lexeme ("+strBuf+"): identifiers must begin with a letter."); throw Token(Token::LEX_ERROR,dv); case 1: dv.SetStrData("Unrecognized lexeme ("+strBuf+"): identifiers can only contain underscores and alphanumeric characters."); throw Token(Token::LEX_ERROR,dv); case 2: dv.SetStrData("Unrecognized lexeme ("+strBuf+"): identifiers can be at most 20 characters long."); throw Token(Token::LEX_ERROR,dv); } } dv.SetStrData(strBuf); obj=Token(Token::LEX_IDENT,dv); return true; } } if (id) { dv.Clear(); obj=Token((Token::TokenType)id,dv); return true; } dv.SetStrData("Undefined error."); throw Token(Token::LEX_ERROR,dv); } //int 0 means eof dv.SetStrData("EOF"); obj=Token(Token::LEX_ERROR,dv); return false; }