inline Parser::Token Parser::nextTokenInternal() { skipWS(); if (m_nextPos >= m_data.length()) return Token(0); char code = peekCurHelper(); switch (code) { case '(': case ')': case '[': case ']': case '@': case ',': case '|': return makeTokenAndAdvance(code); case '\'': case '\"': return lexString(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(); case '.': { char next = peekAheadHelper(); if (next == '.') return makeTokenAndAdvance(DOTDOT, 2); if (next >= '0' && next <= '9') return lexNumber(); return makeTokenAndAdvance('.'); } case '/': if (peekAheadHelper() == '/') return makeTokenAndAdvance(SLASHSLASH, 2); return makeTokenAndAdvance('/'); case '+': return makeTokenAndAdvance(PLUS); case '-': return makeTokenAndAdvance(MINUS); case '=': return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); case '!': if (peekAheadHelper() == '=') return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); return Token(XPATH_ERROR); case '<': if (peekAheadHelper() == '=') return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); case '>': if (peekAheadHelper() == '=') return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); case '*': if (isBinaryOperatorContext()) return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); ++m_nextPos; return Token(NAMETEST, "*"); case '$': { // $ QName m_nextPos++; String name; if (!lexQName(name)) return Token(XPATH_ERROR); return Token(VARIABLEREFERENCE, name); } } String name; if (!lexNCName(name)) return Token(XPATH_ERROR); skipWS(); // If we're in an operator context, check for any operator names if (isBinaryOperatorContext()) { if (name == "and") //### hash? return Token(AND); if (name == "or") return Token(OR); if (name == "mod") return Token(MULOP, NumericOp::OP_Mod); if (name == "div") return Token(MULOP, NumericOp::OP_Div); } // See whether we are at a : if (peekCurHelper() == ':') { m_nextPos++; // Any chance it's an axis name? if (peekCurHelper() == ':') { m_nextPos++; //It might be an axis name. Step::Axis axis; if (parseAxisName(name, axis)) return Token(AXISNAME, axis); // Ugh, :: is only valid in axis names -> error return Token(XPATH_ERROR); } // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest skipWS(); if (peekCurHelper() == '*') { m_nextPos++; return Token(NAMETEST, name + ":*"); } // Make a full qname. String n2; if (!lexNCName(n2)) return Token(XPATH_ERROR); name = name + ":" + n2; } skipWS(); if (peekCurHelper() == '(') { // note: we don't swallow the '(' here! // Either node type oor function name. if (name == "processing-instruction") return Token(PI); if (name == "node") return Token(NODE); if (name == "text") return Token(TEXT); if (name == "comment") return Token(COMMENT); return Token(FUNCTIONNAME, name); } // At this point, it must be NAMETEST. return Token(NAMETEST, name); }
Symbol Lexer_get_sym(Lexer* l) { if (l->peekDone) { l->peekDone = false; l->sym = l->nextSym; l->symc = l->nextSymc; strcpy(l->text, l->nextText); return l->sym; } do { if (EOB) fillbuffer(l); skipWhiteSpace(l); skipComment(l); } while((EOB || isblank(_BC) || _BC == '"') && l->infile); if(_BC == '\'') { lexString(l); } else _MATCH('[', NewBlock) else _MATCH(']', EndBlock) else if(_BC == ':') { if(l->buf[l->bufp+1] == '=') { l->bufp += 2; l->sym = Assign; l->symc = 0; sprintf(l->text, ":="); } else { l->bufp++; l->sym = Colon; l->symc = ':'; sprintf(l->text, ":"); } } else _MATCH('(', NewTerm) else _MATCH(')', EndTerm) else _MATCH('#', Pound) else _MATCH('^', Exit) else _MATCH('.', Period) else if(_BC == '-') { if(!strncmp(l->buf + l->bufp, SEPARATOR, strlen(SEPARATOR))) { char* t = l->text; while(_BC == '-') *t++ = l->buf[l->bufp++]; *t = 0; l->sym = Separator; } else { lexOperator(l); } } else if(_ISOP(_BC)) { lexOperator(l); } else if(!strncmp(l->buf + l->bufp, PRIMITIVE, strlen(PRIMITIVE))) { l->bufp += strlen(PRIMITIVE); l->sym = Primitive; l->symc = 0; sprintf(l->text, PRIMITIVE); } else if(isalpha(_BC)) { char* t = l->text; l->symc = 0; while(isalpha(_BC) || isdigit(_BC) || _BC == '_') *t++ = l->buf[l->bufp++]; l->sym = Identifier; if(l->buf[l->bufp] == ':') { l->sym = Keyword; l->bufp++; *t++ = ':'; if(isalpha(_BC)) { l->sym = KeywordSequence; while(isalpha(_BC) || _BC == ':') *t++ = l->buf[l->bufp++]; } } *t = 0; } else if(isdigit(_BC)) { lexNumber(l); } else { l->sym = NONE; l->symc = _BC; sprintf(l->text, "%c", _BC); } return l->sym; }
yylex() { char c; for (;;) { c = input(); oldnewline = newline; newline = FALSE; switch (char_type[c]) { Case C_SLASH: if (oldnewline) { parseRawline(); return(Rawline); } c = input(); if (c == '*') comment(); else { unput(c); return(DIV); } Case C_ZERO: c = input(); if (c == 'x' || c == 'X') parseNumber(16, input()); else if (c == 'b' || c == 'B') parseNumber(2, input()); else if (c == 'q' || c == 'Q') parseNumber(4, input()); else parseNumber(8, c); return(Number); Case C_DIG: parseNumber(10, c); return(Number); Case C_ALPH: parseName(c); if ((yylval = matchKeyword(yytext)) != 0) return(yylval); yylval = lookupSymbol(yytext); /* if (debug) printf("lexer: Name '%s'\n", yytext);*/ return(Name); Case C_QUOTE: lexString('"'); string(); return(String); Case C_APOSTROPHE: lexString('\''); c = input(); if (c == 'b' || c == 'B') { bitString(); return(BitString); } else { string(); unput(c); return(String); } Case C_LIT: return(litCode[c]); Case C_NL: newline = TRUE; } } }
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) { while (m_ptr < m_end && isASCIISpace(*m_ptr)) ++m_ptr; ASSERT(m_ptr <= m_end); if (m_ptr >= m_end) { token.type = TokEnd; token.start = token.end = m_ptr; return TokEnd; } token.type = TokError; token.start = m_ptr; switch (*m_ptr) { case '[': token.type = TokLBracket; token.end = ++m_ptr; return TokLBracket; case ']': token.type = TokRBracket; token.end = ++m_ptr; return TokRBracket; case '(': token.type = TokLParen; token.end = ++m_ptr; return TokLBracket; case ')': token.type = TokRParen; token.end = ++m_ptr; return TokRBracket; case '{': token.type = TokLBrace; token.end = ++m_ptr; return TokLBrace; case '}': token.type = TokRBrace; token.end = ++m_ptr; return TokRBrace; case ',': token.type = TokComma; token.end = ++m_ptr; return TokComma; case ':': token.type = TokColon; token.end = ++m_ptr; return TokColon; case '"': case '\'': return lexString(token); // Numbers are trickier so we only allow the most basic form, basically // * [1-9][0-9]*(\.[0-9]*)? // * \.[0-9]* // * 0(\.[0-9]*)? case '0': // If a number starts with 0 it's expected to be octal. It seems silly // to attempt to handle this case, so we abort if (m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1])) return TokError; return lexNumber(token); case '.': // If a number starts with a '.' it must be followed by a digit if (!(m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1]))) return TokError; return lexNumber(token); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(token); } return TokError; }