Token nextToken() { Token tok; int c; while ( ( c = currentChar(globalStream) ) == ' ' || c == '\t' || c == '\n') { nextChar(globalStream); } if(c == EOF) { tok.type = TT_EOF; } else if (c == ';') { while ( ( c = nextChar(globalStream) ) != '\n' && c != EOF ); nextChar(globalStream); } else if (c == ':') { tok.type = TT_COLON; nextChar(globalStream); } else if (c == ';') { tok.type = TT_SEMICOLON; nextChar(globalStream); } else if(c == ',') { tok.type = TT_COMMA; nextChar(globalStream); } else if(c == '-' || isdigit(c) ) { int sign = (c == '-' ? -1 : 1), val = (c == '-' ? 0 : (c - '0') ); while(isdigit(nextChar(globalStream) ) ) { val = val * 10 + (currentChar(globalStream) - '0'); } tok.type = TT_NUMBER; tok.value._int = sign * val; } else { int len = 0; tok.type = TT_WORD; tok.value.str = NULL; tok.value.str = charbufAppend(tok.value.str, len++, c); while(isalnum(nextChar(globalStream) ) ) { tok.value.str = charbufAppend(tok.value.str, len++, currentChar(globalStream)); } } globalCurrentToken = tok; return tok; }
FlowToken FlowLexer::parseInterpolationFragment(bool start) { int last = -1; stringValue_.clear(); // skip either '"' or '}' depending on your we entered nextChar(); for (;;) { if (eof()) return token_ = FlowToken::Eof; if (currentChar() == '"' && last != '\\') { nextChar(); --interpolationDepth_; return token_ = start ? FlowToken::String : FlowToken::InterpolatedStringEnd; } if (currentChar() == '\\') { nextChar(); if (eof()) return token_ = FlowToken::Eof; switch (currentChar()) { case 'r': stringValue_ += '\r'; break; case 'n': stringValue_ += '\n'; break; case 't': stringValue_ += '\t'; break; case '\\': stringValue_ += '\\'; break; default: stringValue_ += '\\'; stringValue_ += static_cast<char>(currentChar()); break; } } else if (currentChar() == '#') { nextChar(); if (currentChar() == '{') { nextChar(); return token_ = FlowToken::InterpolatedStringFragment; } else { stringValue_ += '#'; } stringValue_ += static_cast<char>(currentChar()); } else { stringValue_ += static_cast<char>(currentChar()); } last = currentChar(); nextChar(); } }
void CTokenizerBase::skipToNextLine() { while ( currentChar() != '\n' ){ nextChar(); if(endOfStream)return; } nextChar(); }
void SegmentedString::advance(unsigned count, UChar* consumedCharacters) { ASSERT_WITH_SECURITY_IMPLICATION(count <= length()); for (unsigned i = 0; i < count; ++i) { consumedCharacters[i] = currentChar(); advance(); } }
bool FlowLexer::advanceUntil(char value) { while (currentChar() != value && !eof()) nextChar(); return !eof(); }
void KeypadInputMethod::preedit(const QString &str) { if (str.isNull()) { preedit(QString(1, currentChar())); } else { sendPreeditString(str, str.size()); _state = COMPOSING; } }
void KeypadInputMethod::commit(const QString &str) { if (str.isNull()) { commit(QString(1, currentChar())); } else { sendCommitString(str); _state = COMMITTED; } }
// ipv6HexDigit4 *(':' ipv6HexDigit4) ['::' [ipv6HexSeq]] // where the first component, ipv6HexDigit4 is already parsed FlowToken FlowLexer::continueParseIPv6(bool firstComplete) { bool rv = true; if (firstComplete) { while (currentChar() == ':' && peekChar() != ':') { stringValue_ += ':'; nextChar(); if (!ipv6HexDigit4()) return false; } if (currentChar() == ':' && peekChar() == ':') { stringValue_ += "::"; nextChar(); nextChar(); rv = isHexChar() ? ipv6HexSeq() : true; } } else { ipv6HexDigits_ = stringValue_.size(); rv = ipv6HexPart(); } // parse embedded IPv4 remainer while (currentChar_ == '.' && std::isdigit(peekChar())) { stringValue_ += '.'; nextChar(); while (std::isdigit(currentChar_)) { stringValue_ += static_cast<char>(currentChar_); nextChar(); } } if (rv && ipValue_.set(stringValue_.c_str(), IPAddress::V6)) return token_ = FlowToken::IP; else return token_ = FlowToken::Unknown; }
// IPv6_HexPart ::= IPv6_HexSeq # (1) // | IPv6_HexSeq "::" [IPv6_HexSeq] # (2) // | "::" [IPv6_HexSeq] # (3) // bool FlowLexer::ipv6HexPart() { bool rv; if (currentChar() == ':' && peekChar() == ':') { // (3) stringValue_ = "::"; nextChar(); // skip ':' nextChar(); // skip ':' rv = isHexChar() ? ipv6HexSeq() : true; } else if (!!(rv = ipv6HexSeq())) { if (currentChar() == ':' && peekChar() == ':') { // (2) stringValue_ += "::"; nextChar(); // skip ':' nextChar(); // skip ':' rv = isHexChar() ? ipv6HexSeq() : true; } } if (std::isalnum(currentChar_) || currentChar_ == ':') rv = false; return rv; }
// 1*4HEXDIGIT bool FlowLexer::ipv6HexDigit4() { size_t i = ipv6HexDigits_; while (isHexChar()) { stringValue_ += currentChar(); nextChar(); ++i; } ipv6HexDigits_ = 0; return i >= 1 && i <= 4; }
FlowToken FlowLexer::parseString(char delimiter, FlowToken result) { int delim = currentChar(); int last = -1; nextChar(); // skip left delimiter stringValue_.clear(); while (!eof() && (currentChar() != delim || (last == '\\'))) { stringValue_ += static_cast<char>(currentChar()); last = currentChar(); nextChar(); } if (currentChar() == delim) { nextChar(); return token_ = result; } return token_ = FlowToken::Unknown; }
Token::Ptr MutCTokenizer::extractToken () { Token::Ptr token; // terminate if (currentChar () == EOF) { token = make_shared <TerminatorToken> (__source); } // skip space character else if (isspace (currentChar ())) { nextChar (); return extractToken (); } // number else if (currentChar () >= '0' && currentChar () <= '9') { token = make_shared<NumberToken> (__source); } // identifier else if (isalpha (currentChar ()) || currentChar () == '_') { token = make_shared <IdentifierToken> (__source); } // symbol else if (! isalnum (currentChar ()) && currentChar () != '_') { token = make_shared <SymbolToken> (__source); } // string else if (currentChar () == '"') { token = make_shared <StringToken> (__source); } else { // TODO error handling: unexpected beginning character return nullptr; } token->build (__source); return token; }
// 1*4HEXDIGIT *(':' 1*4HEXDIGIT) bool FlowLexer::ipv6HexSeq() { if (!ipv6HexDigit4()) return false; while (currentChar() == ':' && peekChar() != ':') { stringValue_ += ':'; nextChar(); if (!ipv6HexDigit4()) return false; } return true; }
bool CTokenizerBase::nextToken() { string newToken; newToken = ""; skipWhitespace(); if(endOfStream)return false; // Handle quoted strings if(currentChar() == '\"'){ while(1){ nextChar(); if(endOfStream){ //Exception to handle open quotation token = newToken; return true; } if(currentChar() == '\"'){ nextChar(); token = newToken; return true; } if(currentChar() == '\n') newToken += " "; else newToken += currentChar(); } } do { newToken += currentChar(); nextChar(); if(endOfStream)break; } while (currentChar() > 32); token.assign(newToken); return true; }
bool FlowLexer::consume(char c) { bool result = currentChar() == c; nextChar(); return result; }
bool ScriptManager::end(void) { return currentChar() == EOL; }
void NetMessageReader::parse() { while (!isEndOfBuffer()) { if (std::isprint(currentChar())) { TRACE("parse: '%c' (%d) %s", currentChar(), static_cast<int>(state()), state_str()); } else { TRACE("parse: 0x%02X (%d) %s", currentChar(), static_cast<int>(state()), state_str()); } switch (state()) { case MESSAGE_BEGIN: // Syntetic state. Go straight to TYPE. case MESSAGE_TYPE: switch (currentChar()) { case '+': case '-': case ':': currentContext_->type = static_cast<NetMessage::Type>(currentChar()); setState(MESSAGE_LINE_BEGIN); nextChar(); break; case '$': currentContext_->type = NetMessage::String; setState(BULK_BEGIN); break; case '*': currentContext_->type = NetMessage::Array; setState(MESSAGE_NUM_ARGS); nextChar(); break; default: currentContext_->type = NetMessage::Nil; setState(SYNTAX_ERROR); return; } break; case MESSAGE_LINE_BEGIN: if (currentChar() == '\r') { setState(SYNTAX_ERROR); return; } setState(MESSAGE_LINE_OR_CR); begin_ = pos_; nextChar(); break; case MESSAGE_LINE_OR_CR: if (currentChar() == '\n') { setState(SYNTAX_ERROR); return; } if (currentChar() == '\r') setState(MESSAGE_LINE_LF); nextChar(); break; case MESSAGE_LINE_LF: { if (currentChar() != '\n') { setState(SYNTAX_ERROR); return; } BufferRef value = buffer_->ref(begin_, pos_ - begin_ - 1); switch (currentContext_->type) { case NetMessage::Status: currentContext_->message = NetMessage::createStatus(value); break; case NetMessage::Error: currentContext_->message = NetMessage::createError(value); break; case NetMessage::String: currentContext_->message = NetMessage::createString(value); break; case NetMessage::Number: currentContext_->message = NetMessage::createNumber(value.toInt()); break; default: currentContext_->message = NetMessage::createNil(); break; } setState(MESSAGE_END); nextChar(); popContext(); break; } case MESSAGE_NUM_ARGS: { switch (currentChar()) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': currentContext_->number *= 10; currentContext_->number += currentChar() - '0'; setState(MESSAGE_NUM_ARGS_OR_CR); nextChar(); break; default: setState(SYNTAX_ERROR); return; } break; } case MESSAGE_NUM_ARGS_OR_CR: switch (currentChar()) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': currentContext_->number *= 10; currentContext_->number += currentChar() - '0'; nextChar(); break; case '\r': setState(MESSAGE_LF); currentContext_->message = NetMessage::createArray(currentContext_->number); nextChar(); break; default: setState(SYNTAX_ERROR); return; } break; case MESSAGE_LF: if (currentChar() != '\n') { setState(SYNTAX_ERROR); return; } nextChar(); if (currentContext_->type == NetMessage::Array) { setState(BULK_BEGIN); pushContext(); } else { setState(MESSAGE_END); popContext(); } break; case BULK_BEGIN: if (currentChar() != '$') { setState(SYNTAX_ERROR); return; } setState(BULK_SIZE); nextChar(); break; case BULK_SIZE: switch (currentChar()) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': argSize_ *= 10; argSize_ += currentChar() - '0'; setState(BULK_SIZE_OR_CR); nextChar(); break; default: setState(SYNTAX_ERROR); return; } break; case BULK_SIZE_OR_CR: switch (currentChar()) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': argSize_ *= 10; argSize_ += currentChar() - '0'; nextChar(); break; case '\r': setState(BULK_SIZE_LF); nextChar(); break; default: setState(SYNTAX_ERROR); return; } break; case BULK_SIZE_LF: if (currentChar() != '\n') { setState(SYNTAX_ERROR); return; } nextChar(); setState(BULK_BODY_OR_CR); begin_ = pos_; break; case BULK_BODY_OR_CR: if (argSize_ > 0) { argSize_ -= nextChar(argSize_); } else if (currentChar() == '\r') { BufferRef value = buffer_->ref(begin_, pos_ - begin_); currentContext_->message = NetMessage::createString(value); nextChar(); setState(BULK_BODY_LF); } else { setState(SYNTAX_ERROR); return; } break; case BULK_BODY_LF: if (currentChar() != '\n') { setState(SYNTAX_ERROR); return; } nextChar(); setState(MESSAGE_END); popContext(); break; case MESSAGE_END: // if we reach here, then only because // there's garbage at the end of our message. break; case SYNTAX_ERROR: fprintf(stderr, "NetMessageSocket message syntax error at offset %zi\n", pos_); break; default: break; } } }
// return next token ExtQualModuleNames::tokenType ExtQualModuleNames::scanner() { currentToken_ = ""; if (atEnd()) { return SCANEOF; } while (!atEnd()) { const char cc = returnAdvanceChar(); if (isspace((unsigned char)cc)) { // For VS2003... continue; // do nothing } else if (isalpha(cc)) { // regular identifier currentToken_ += cc; while (isalnum(currentChar()) || currentChar() == '_') { currentToken_ += currentChar(); advanceChar(); } // convert id to internal format (ie, uppercase it) NAString id(currentToken_.c_str()); if (ToInternalIdentifier(id) != 0) { *mxCUMptr << FAIL << DgSqlCode(-2215) << DgString0(currentToken_.c_str()); } currentToken_ = id.data(); return checkReserved(); } currentToken_ += cc; switch (cc) { case '{' : case '}' : case ',' : case '.' : case '=' : return tokenType(cc); case '"': // "delimited identifier" specified by \"([^\"]|"\"\"")*\" while (!atEnd()) { const char c1 = returnAdvanceChar(); currentToken_ += c1; if (c1 == '"') { if (currentChar() == '"') { currentToken_ += currentChar(); } else { // end of delimited identifier // convert delimited id to internal format NAString id(currentToken_.c_str()); if (ToInternalIdentifier(id, FALSE, TRUE) != 0) { *mxCUMptr << FAIL << DgSqlCode(-2209) << DgString0(currentToken_.c_str()); } currentToken_ = id.data(); return ID; } } } *mxCUMptr << FAIL << DgSqlCode(-2210); // unterminated string return ID; default: advanceChar(); *mxCUMptr << FAIL << DgSqlCode(-2211) << DgString0(currentToken_.c_str()); return SCANERROR; } } return SCANEOF; }
FlowToken FlowLexer::parseIdent() { stringValue_.clear(); stringValue_ += static_cast<char>(currentChar()); bool isHex = isHexChar(); nextChar(); while (std::isalnum(currentChar()) || currentChar() == '_' || currentChar() == '.') { stringValue_ += static_cast<char>(currentChar()); if (!isHexChar()) isHex = false; nextChar(); } // ipv6HexDigit4 *(':' ipv6HexDigit4) ['::' [ipv6HexSeq]] if (stringValue_.size() <= 4 && isHex && currentChar() == ':') return continueParseIPv6(true); if (stringValue_.size() < 4 && isHex && isHexChar()) return continueParseIPv6(false); static struct { const char *symbol; FlowToken token; } keywords[] = { { "in", FlowToken::In }, { "var", FlowToken::Var }, { "on", FlowToken::On }, { "do", FlowToken::Do }, { "if", FlowToken::If }, { "then", FlowToken::Then }, { "else", FlowToken::Else }, { "unless", FlowToken::Unless }, { "import", FlowToken::Import }, { "from", FlowToken::From }, { "handler", FlowToken::Handler }, { "and", FlowToken::And }, { "or", FlowToken::Or }, { "xor", FlowToken::Xor }, { "not", FlowToken::Not }, { "bool", FlowToken::BoolType }, { "int", FlowToken::IntType }, { "string", FlowToken::StringType }, { 0, FlowToken::Unknown } }; for (auto i = keywords; i->symbol; ++i) if (strcmp(i->symbol, stringValue_.c_str()) == 0) return token_ = i->token; if (stringValue_ == "true" || stringValue_ == "yes") { numberValue_ = 1; return token_ = FlowToken::Boolean; } if (stringValue_ == "false" || stringValue_ == "no") { numberValue_ = 0; return token_ = FlowToken::Boolean; } return token_ = FlowToken::Ident; }
FlowToken FlowLexer::parseNumber() { stringValue_.clear(); numberValue_ = 0; while (std::isdigit(currentChar())) { numberValue_ *= 10; numberValue_ += currentChar() - '0'; stringValue_ += static_cast<char>(currentChar()); nextChar(); } // ipv6HexDigit4 *(':' ipv6HexDigit4) ['::' [ipv6HexSeq]] if (stringValue_.size() <= 4 && currentChar() == ':') return continueParseIPv6(true); if (stringValue_.size() < 4 && isHexChar()) return continueParseIPv6(false); if (currentChar() != '.') return token_ = FlowToken::Number; // 2nd IP component stringValue_ += '.'; nextChar(); while (std::isdigit(currentChar())) { stringValue_ += static_cast<char>(currentChar()); nextChar(); } // 3rd IP component if (!consume('.')) return token_ = FlowToken::Unknown; stringValue_ += '.'; while (std::isdigit(currentChar())) { stringValue_ += static_cast<char>(currentChar()); nextChar(); } // 4th IP component if (!consume('.')) return token_ = FlowToken::Unknown; stringValue_ += '.'; while (std::isdigit(currentChar())) { stringValue_ += static_cast<char>(currentChar()); nextChar(); } ipValue_.set(stringValue_.c_str(), IPAddress::V4); return token_ = FlowToken::IP; }
char ScriptManager::back(void) { this->idx--; return currentChar(); }
FlowToken FlowLexer::nextToken() { bool expectsValue = token() == FlowToken::Ident || FlowTokenTraits::isOperator(token()); lastPos_ = currPos_; if (consumeSpace()) return token_ = FlowToken::Eof; // printf("FlowLexer.nextToken: currentChar %s curr[%zu:%zu.%zu] next[%zu:%zu.%zu]\n", // escape(currentChar_).c_str(), // currPos_.line, currPos_.column, currPos_.offset, // nextPos_.line, nextPos_.column, nextPos_.offset); content_.clear(); content_ += static_cast<char>(currentChar_); lastLocation_ = currLocation_; currLocation_.begin = currPos_; switch (currentChar_) { case EOF: // (-1) return token_ = FlowToken::Eof; case '=': switch (nextChar()) { case '=': nextChar(); return token_ = FlowToken::Equal; case '^': nextChar(); return token_ = FlowToken::PrefixMatch; case '$': nextChar(); return token_ = FlowToken::SuffixMatch; case '~': nextChar(); return token_ = FlowToken::RegexMatch; case '>': nextChar(); return token_ = FlowToken::HashRocket; default: return token_ = FlowToken::Assign; } case '<': switch (nextChar()) { case '<': nextChar(); return token_ = FlowToken::Shl; case '=': nextChar(); return token_ = FlowToken::LessOrEqual; default: return token_ = FlowToken::Less; } case '>': switch (nextChar()) { case '>': nextChar(); return token_ = FlowToken::Shr; case '=': nextChar(); return token_ = FlowToken::GreaterOrEqual; default: return token_ = FlowToken::Greater; } case '|': switch (nextChar()) { case '|': nextChar(); return token_ = FlowToken::Or; case '=': nextChar(); return token_ = FlowToken::OrAssign; default: return token_ = FlowToken::BitOr; } case '&': switch (nextChar()) { case '&': nextChar(); return token_ = FlowToken::And; case '=': nextChar(); return token_ = FlowToken::AndAssign; default: return token_ = FlowToken::BitAnd; } case '.': if (nextChar() == '.') { if (nextChar() == '.') { nextChar(); return token_ = FlowToken::Ellipsis; } return token_ = FlowToken::DblPeriod; } return token_ = FlowToken::Period; case ':': if (peekChar() == ':') { stringValue_.clear(); return continueParseIPv6(false); } else { nextChar(); return token_ = FlowToken::Colon; } case ';': nextChar(); return token_ = FlowToken::Semicolon; case ',': nextChar(); return token_ = FlowToken::Comma; case '{': nextChar(); return token_ = FlowToken::Begin; case '}': if (interpolationDepth_) { return token_ = parseInterpolationFragment(false); } else { nextChar(); return token_ = FlowToken::End; } case '(': nextChar(); return token_ = FlowToken::RndOpen; case ')': nextChar(); return token_ = FlowToken::RndClose; case '[': nextChar(); return token_ = FlowToken::BrOpen; case ']': nextChar(); return token_ = FlowToken::BrClose; case '+': nextChar(); return token_ = FlowToken::Plus; case '-': nextChar(); return token_ = FlowToken::Minus; case '*': switch (nextChar()) { case '*': nextToken(); return token_ = FlowToken::Pow; default: return token_ = FlowToken::Mul; } case '/': if (expectsValue) return token_ = parseString('/', FlowToken::RegExp); nextChar(); return token_ = FlowToken::Div; case '%': nextChar(); return token_ = FlowToken::Mod; case '!': switch (nextChar()) { case '=': nextChar(); return token_ = FlowToken::UnEqual; default: return token_ = FlowToken::Not; } case '\'': return token_ = parseString(true); case '"': ++interpolationDepth_; return token_ = parseInterpolationFragment(true); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return parseNumber(); default: if (std::isalpha(currentChar()) || currentChar() == '_') return token_ = parseIdent(); if (std::isprint(currentChar())) printf("lexer: unknown char %c (0x%02X)\n", currentChar(), currentChar()); else printf("lexer: unknown char %u (0x%02X)\n", currentChar() & 0xFF, currentChar() & 0xFF); nextChar(); return token_ = FlowToken::Unknown; } }
void CTokenizerBase::skipWhitespace() { while ( currentChar() <= ' ') { nextChar(); if(endOfStream)return; } }
char ScriptManager::next(void) { this->idx++; return currentChar(); }
/** * \retval true abort tokenizing in caller * \retval false continue tokenizing in caller */ bool FlowLexer::consumeSpace() { // skip spaces for (;; nextChar()) { if (eof()) return true; if (std::isspace(currentChar_)) continue; if (std::isprint(currentChar_)) break; // TODO proper error reporting through API callback std::fprintf(stderr, "%s[%04zu:%02zu]: invalid byte %d (0x%02X)\n", currLocation_.fileName.c_str(), nextPos_.line, nextPos_.column, currentChar() & 0xFF, currentChar() & 0xFF); } if (eof()) return true; if (currentChar() == '#') { // skip chars until EOL for (;;) { if (eof()) { token_ = FlowToken::Eof; return true; } if (currentChar() == '\n') { nextChar(); return consumeSpace(); } nextChar(); } } if (currentChar() == '/' && peekChar() == '*') { // "/*" ... "*/" // parse multiline comment nextChar(); for (;;) { if (eof()) { token_ = FlowToken::Eof; // reportError(Error::UnexpectedEof); return true; } if (currentChar() == '*' && peekChar() == '/') { nextChar(); // skip '*' nextChar(); // skip '/' break; } nextChar(); } return consumeSpace(); } return false; }
char ScriptManager::forward(size_t progress) { this->idx += progress; return currentChar(); }