void Lexer::token(Token &t) { if ( file.fd() < 0) t.Type( Token::eof); else { token2(t); if (t.Type() == Token::eof) file.Close(); } lasttokentype=t.Type(); if (maildrop.embedded_mode) switch (lasttokentype) { case Token::tokento: case Token::tokencc: case Token::btstring: case Token::tokenxfilter: case Token::dotlock: case Token::flock: case Token::logfile: case Token::log: { Buffer errmsg; errmsg="maildrop: '"; errmsg += t.Name(); errmsg += "' disabled in embedded mode.\n"; errmsg += '\0'; error((const char *)errmsg); t.Type( Token::error ); break; } default: break; } if (VerboseLevel() > 8) { Buffer debug; debug="Tokenized "; debug += t.Name(); debug += '\n'; debug += '\0'; error((const char *)debug); } }
// ------------------------------------------------------------------------------------------------ int64_t ParseTokenAsInt64(const Token& t, const char*& err_out) { err_out = NULL; if (t.Type() != TokenType_DATA) { err_out = "expected TOK_DATA token"; return 0L; } if (t.IsBinary()) { const char* data = t.begin(); if (data[0] != 'L') { err_out = "failed to parse Int64, unexpected data type"; return 0L; } BE_NCONST int64_t id = SafeParse<int64_t>(data + 1, t.end()); AI_SWAP8(id); return id; } // XXX: should use size_t here unsigned int length = static_cast<unsigned int>(t.end() - t.begin()); ai_assert(length > 0); const char* out; const int64_t id = strtol10_64(t.begin(), &out, &length); if (out > t.end()) { err_out = "failed to parse Int64 (text)"; return 0L; } return id; }
// ------------------------------------------------------------------------------------------------ int ParseTokenAsInt(const Token& t, const char*& err_out) { err_out = NULL; if (t.Type() != TokenType_DATA) { err_out = "expected TOK_DATA token"; return 0; } if(t.IsBinary()) { const char* data = t.begin(); if (data[0] != 'I') { err_out = "failed to parse I(nt), unexpected data type (binary)"; return 0; } BE_NCONST int32_t ival = SafeParse<int32_t>(data+1, t.end()); AI_SWAP4(ival); return static_cast<int>(ival); } ai_assert(static_cast<size_t>(t.end() - t.begin()) > 0); const char* out; const int intval = strtol10(t.begin(),&out); if (out != t.end()) { err_out = "failed to parse ID"; return 0; } return intval; }
bool Tokenizer::Check(const TokenType aTokenType, Token& aResult) { if (!HasInput()) { mHasFailed = true; return false; } nsACString::const_char_iterator next = Parse(aResult); if (aTokenType != aResult.Type()) { mHasFailed = true; return false; } mRollback = mCursor; mCursor = next; aResult.AssignFragment(mRollback, mCursor); mPastEof = aResult.Type() == TOKEN_EOF; mHasFailed = false; return true; }
int main() { cout << "please input some text, both word and number are ok" << endl; cout << ">"; string input; getline(cin,input); Lexer lexer(input); while(1){ Token t = lexer.NextToken(); cout << t; if(t.Type()==TokenType._EOF) break; } return 0; }
bool Token::operator==(Token &ref) const { // Compare types, then data if necessary if (Type() == ref.Type()) { switch (Type()) { case CharacterString: // printf(" str1 == '%s'\n", String()); // printf(" str2 == '%s'\n", ref.String()); // printf(" strcmp() == %d\n", strcmp(String(), ref.String())); { return String() == ref.String(); /* // strcmp() seems to choke on certain, non-normal ASCII chars // (i.e. chars outside the usual alphabets, but still valid // as far as ASCII is concerned), so we'll just compare the // strings by hand to be safe. const char *str1 = String(); const char *str2 = ref.String(); int len1 = strlen(str1); int len2 = strlen(str2); // printf("len1 == %d\n", len1); // printf("len2 == %d\n", len2); if (len1 == len2) { for (int i = 0; i < len1; i++) { // printf("i == %d, str1[%d] == %x, str2[%d] == %x\n", i, i, str1[i], i, str2[i]); if (str1[i] != str2[i]) return false; } } return true; */ } // return strcmp(String(), ref.String()) == 0; case Integer: return Int() == ref.Int(); case FloatingPoint: return Float() == ref.Float(); default: return true; } } else return false; }
bool Tokenizer::Next(Token& aToken) { if (!HasInput()) { mHasFailed = true; return false; } mRollback = mCursor; mCursor = Parse(aToken); aToken.AssignFragment(mRollback, mCursor); mPastEof = aToken.Type() == TOKEN_EOF; mHasFailed = false; return true; }
// ------------------------------------------------------------------------------------------------ size_t ParseTokenAsDim(const Token& t, const char*& err_out) { // same as ID parsing, except there is a trailing asterisk err_out = NULL; if (t.Type() != TokenType_DATA) { err_out = "expected TOK_DATA token"; return 0; } if(t.IsBinary()) { const char* data = t.begin(); if (data[0] != 'L') { err_out = "failed to parse ID, unexpected data type, expected L(ong) (binary)"; return 0; } BE_NCONST uint64_t id = SafeParse<uint64_t>(data+1, t.end()); AI_SWAP8(id); return static_cast<size_t>(id); } if(*t.begin() != '*') { err_out = "expected asterisk before array dimension"; return 0; } // XXX: should use size_t here unsigned int length = static_cast<unsigned int>(t.end() - t.begin()); if(length == 0) { err_out = "expected valid integer number after asterisk"; return 0; } const char* out; const size_t id = static_cast<size_t>(strtoul10_64(t.begin() + 1,&out,&length)); if (out > t.end()) { err_out = "failed to parse ID"; return 0; } return id; }
// ------------------------------------------------------------------------------------------------ std::string ParseTokenAsString(const Token& t, const char*& err_out) { err_out = NULL; if (t.Type() != TokenType_DATA) { err_out = "expected TOK_DATA token"; return ""; } if(t.IsBinary()) { const char* data = t.begin(); if (data[0] != 'S') { err_out = "failed to parse S(tring), unexpected data type (binary)"; return ""; } ai_assert(t.end() - data >= 5); // read string length BE_NCONST int32_t len = *reinterpret_cast<const int32_t*>(data+1); AI_SWAP4(len); ai_assert(t.end() - data == 5 + len); return std::string(data + 5, len); } const size_t length = static_cast<size_t>(t.end() - t.begin()); if(length < 2) { err_out = "token is too short to hold a string"; return ""; } const char* s = t.begin(), *e = t.end() - 1; if (*s != '\"' || *e != '\"') { err_out = "expected double quoted string"; return ""; } return std::string(s+1,length-2); }
bool Tokenizer::Check(const Token& aToken) { if (!HasInput()) { mHasFailed = true; return false; } Token parsed; nsACString::const_char_iterator next = Parse(parsed); if (!aToken.Equals(parsed)) { mHasFailed = true; return false; } mRollback = mCursor; mCursor = next; mPastEof = parsed.Type() == TOKEN_EOF; mHasFailed = false; return true; }
// ------------------------------------------------------------------------------------------------ float ParseTokenAsFloat(const Token& t, const char*& err_out) { err_out = NULL; if (t.Type() != TokenType_DATA) { err_out = "expected TOK_DATA token"; return 0.0f; } if(t.IsBinary()) { const char* data = t.begin(); if (data[0] != 'F' && data[0] != 'D') { err_out = "failed to parse F(loat) or D(ouble), unexpected data type (binary)"; return 0.0f; } if (data[0] == 'F') { ai_assert(t.end() - data == 5); // no byte swapping needed for ieee floats return *reinterpret_cast<const float*>(data+1); } else { ai_assert(t.end() - data == 9); // no byte swapping needed for ieee floats return static_cast<float>(*reinterpret_cast<const double*>(data+1)); } } // need to copy the input string to a temporary buffer // first - next in the fbx token stream comes ',', // which fast_atof could interpret as decimal point. #define MAX_FLOAT_LENGTH 31 char temp[MAX_FLOAT_LENGTH + 1]; const size_t length = static_cast<size_t>(t.end()-t.begin()); std::copy(t.begin(),t.end(),temp); temp[std::min(static_cast<size_t>(MAX_FLOAT_LENGTH),length)] = '\0'; return fast_atof(temp); }
void Lexer::token2(Token &t) { int c; t.Type(Token::error); // Eat whitespace & comments for (;;) { while ((c=curchar()) >= 0 && isspace(c)) { nextchar(); if (c == '\n' || c == '\r') // Treat as semicolon { t.Type(Token::semicolon); return; } } if (c == '\\') // Continued line? { nextchar(); c=curchar(); if (c < 0 || !isspace(c)) { return; // Error } while (c >= 0 && c != '\n') { nextchar(); c=curchar(); } if (c == '\n') nextchar(); continue; } if (c != '#') break; while ( (c=nextchar()) >= 0 && c != '\n') ; if (c == '\n') { t.Type(Token::semicolon); return; } } if (c < 0) { t.Type(lasttokentype == Token::semicolon ? Token::eof : Token::semicolon); return; } // String, quoted by ", ', or ` Buffer &pattern=t.String(); pattern.reset(); if (c == '\'' || c == '"' || c == '`') { Token::tokentype ttype=Token::qstring; int quote_char=c; if (c == '\'') ttype=Token::sqstring; if (c == '`') ttype=Token::btstring; nextchar(); int q; // Grab string until matching close is found. while ((q=curchar()) != c) { if (q < 0 || q == '\n' || q == '\r') { missquote: error("maildrop: Missing ', \", or `.\n"); return; } // Backslash escape if (q != '\\') { nextchar(); pattern.push(q); continue; } nextchar(); // Look what's after the backslash. // If it's whitespace, we may have a continuation // on the next line. int qq=curchar(); if (qq < 0) goto missquote; if (!isspace(qq) && qq != '\r' && qq != '\n') { if (qq != quote_char && qq != '\\') pattern.push('\\'); pattern.push(qq); nextchar(); continue; } // If it's not a continuation, we need to dutifully // save the characters as the string. So, save the // current length of the string, and backtrack if // necessary. int l=pattern.Length(); pattern.push('\\'); // Collect all whitespace after the backslash, // not including newline characters. while ((q=curchar()) >= 0 && isspace(q) && q != '\r' && q != '\n') { pattern.push(q); nextchar(); } if (q < 0) goto missquote; // If the next character is a newline char, or // a comment, we have a continuation. if (q != '#' && q != '\r' && q != '\n') continue; pattern.Length(l); // Discard padding while (q != '\n') { if (q < 0) goto missquote; nextchar(); q=curchar(); } // Discard all whitespace at the beginning of the // next line. nextchar(); while ( (q=curchar()) >= 0 && isspace(q)) nextchar(); if (q < 0) goto missquote; } nextchar(); t.Type(ttype); return; } // A pattern - "/", then arbitrary text, terminated by "/" if (c == '/' && lasttokentype != Token::equals && lasttokentype != Token::tokento && lasttokentype != Token::tokencc) { pattern.push(c); nextchar(); c=curchar(); if (c == '\r' || c == '\n' || c < 0 || isspace(c)) { t.Type(Token::divi); return; } while ( (c=curchar()) != '/') { if (c < 0 || c == '\r' || c == '\n') return; // Error token - let parser throw // an error if (c == '\\') { pattern.push(c); nextchar(); c=curchar(); if (c < 0 || c == '\r' || c == '\n') return; } pattern.push(c); nextchar(); } pattern.push(c); nextchar(); if ((c=curchar()) == ':') { pattern.push(c); nextchar(); while ( (c=curchar()) >= 0 && (isalnum(c) || c == '-' || c == '+' || c == '.' || c == ',')) { pattern.push(c); nextchar(); } } t.Type(Token::regexpr); return; } // Letters, digits, -, ., :, /, can be in an unquoted string #define ISUNQSTRING(x) (x >= 0 && (isalnum(x) || (x) == '_' || x == '-' || \ (x) == '@' || (x) == '.' || x == ':' || x == SLASH_CHAR || x == '$' || \ x == '{' || x == '}')) // Unquoted string may not begin with {} #define ISLUNQSTRING(x) (x >= 0 && (isalnum(x) || (x) == '_' || x == '-' || \ (x) == '@' || (x) == '.' || x == ':' || x == SLASH_CHAR || x == '$')) if (ISLUNQSTRING(c)) { do { nextchar(); pattern.push(c); c=curchar(); } while ( ISUNQSTRING(c) ); while ( c >= 0 && isspace(c) && c != '\r' && c != '\n') { nextchar(); c=curchar(); } if (pattern.Length() == 2) { int n= ((int)(unsigned char)*(const char *)pattern) << 8 | (unsigned char)((const char *)pattern)[1]; switch (n) { case (('l' << 8) | 't'): t.Type(Token::slt); return; case (('l' << 8) | 'e'): t.Type(Token::sle); return; case (('g' << 8) | 't'): t.Type(Token::sgt); return; case (('g' << 8) | 'e'): t.Type(Token::sge); return; case (('e' << 8) | 'q'): t.Type(Token::seq); return; case (('n' << 8) | 'e'): t.Type(Token::sne); return; case (('t' << 8) | 'o'): t.Type(Token::tokento); return; case (('c' << 8) | 'c'): t.Type(Token::tokencc); return; } } if (pattern == "length") t.Type(Token::length); else if (pattern == "substr") t.Type(Token::substr); else if (pattern == "if") t.Type(Token::tokenif); else if (pattern == "elsif") t.Type(Token::tokenelsif); else if (pattern == "else") t.Type(Token::tokenelse); else if (pattern == "while") t.Type(Token::tokenwhile); else if (pattern == "exception") t.Type(Token::exception); else if (pattern == "echo") t.Type(Token::echo); else if (pattern == "xfilter") t.Type(Token::tokenxfilter); else if (pattern == "dotlock") t.Type(Token::dotlock); else if (pattern == "flock") t.Type(Token::flock); else if (pattern == "logfile") t.Type(Token::logfile); else if (pattern == "log") t.Type(Token::log); else if (pattern == "include") t.Type(Token::include); else if (pattern == "exit") t.Type(Token::exit); else if (pattern == "foreach") t.Type(Token::foreach); else if (pattern == "getaddr") t.Type(Token::getaddr); else if (pattern == "lookup") t.Type(Token::lookup); else if (pattern == "escape") t.Type(Token::escape); else if (pattern == "tolower") t.Type(Token::to_lower); else if (pattern == "toupper") t.Type(Token::to_upper); else if (pattern == "hasaddr") t.Type(Token::hasaddr); else if (pattern == "gdbmopen") t.Type(Token::gdbmopen); else if (pattern == "gdbmclose") t.Type(Token::gdbmclose); else if (pattern == "gdbmfetch") t.Type(Token::gdbmfetch); else if (pattern == "gdbmstore") t.Type(Token::gdbmstore); else if (pattern == "time") t.Type(Token::timetoken); else if (pattern == "import") t.Type(Token::importtoken); else if (pattern == "-") // Hack t.Type(Token::minus); else if (pattern == "unset") t.Type(Token::unset); else t.Type(Token::qstring); return; } switch (c) { case '&': nextchar(); if ( curchar() == '&') { t.Type(Token::land); nextchar(); return; } t.Type(Token::band); return; case '|': nextchar(); if ( curchar() == '|') { t.Type(Token::lor); nextchar(); return; } t.Type(Token::bor); return; case '{': t.Type(Token::lbrace); nextchar(); return; case '}': t.Type(Token::rbrace); nextchar(); return; case '(': t.Type(Token::lparen); nextchar(); return; case ')': t.Type(Token::rparen); nextchar(); return; case ';': t.Type(Token::semicolon); nextchar(); return; case '+': t.Type(Token::plus); nextchar(); return; case '*': t.Type(Token::mult); nextchar(); return; case '~': t.Type(Token::bitwisenot); nextchar(); return; case '<': nextchar(); if ( curchar() == '=') { nextchar(); t.Type(Token::le); return; } t.Type(Token::lt); return; case '>': nextchar(); if ( curchar() == '=') { nextchar(); t.Type(Token::ge); return; } t.Type(Token::gt); return; case '=': nextchar(); if ( curchar() == '~') { nextchar(); t.Type(Token::strregexp); return; } if ( curchar() != '=') { t.Type(Token::equals); return; } nextchar(); t.Type(Token::eq); return; case '!': nextchar(); if ( curchar() != '=') { t.Type(Token::logicalnot); return; } nextchar(); t.Type(Token::ne); return; case ',': nextchar(); t.Type(Token::comma); return; } nextchar(); // Let the parser throw an error. }
bool tok::operator==(TokenType lhs, Token &rhs) { return lhs == rhs.Type(); }
bool tok::operator==(Token &lhs, TokenType rhs) { return lhs.Type() == rhs; }