Ejemplo n.º 1
0
inline Parser::Token Parser::nextTokenInternal()
{
    skipWS();

    if (m_nextPos >= m_data.length())
        return Token(0);

    char code = peekCurHelper();
    switch (code) {
    case '(': case ')': case '[': case ']':
    case '@': case ',': case '|':
        return makeTokenAndAdvance(code);
    case '\'':
    case '\"':
        return lexString();
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
        return lexNumber();
    case '.': {
        char next = peekAheadHelper();
        if (next == '.')
            return makeTokenAndAdvance(DOTDOT, 2);
        if (next >= '0' && next <= '9')
            return lexNumber();
        return makeTokenAndAdvance('.');
    }
    case '/':
        if (peekAheadHelper() == '/')
            return makeTokenAndAdvance(SLASHSLASH, 2);
        return makeTokenAndAdvance('/');
    case '+':
        return makeTokenAndAdvance(PLUS);
    case '-':
        return makeTokenAndAdvance(MINUS);
    case '=':
        return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
    case '!':
        if (peekAheadHelper() == '=')
            return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
        return Token(XPATH_ERROR);
    case '<':
        if (peekAheadHelper() == '=')
            return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
        return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
    case '>':
        if (peekAheadHelper() == '=')
            return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
        return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
    case '*':
        if (isBinaryOperatorContext())
            return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
        ++m_nextPos;
        return Token(NAMETEST, "*");
    case '$': { // $ QName
        m_nextPos++;
        String name;
        if (!lexQName(name))
            return Token(XPATH_ERROR);
        return Token(VARIABLEREFERENCE, name);
    }
    }

    String name;
    if (!lexNCName(name))
        return Token(XPATH_ERROR);

    skipWS();
    // If we're in an operator context, check for any operator names
    if (isBinaryOperatorContext()) {
        if (name == "and") //### hash?
            return Token(AND);
        if (name == "or")
            return Token(OR);
        if (name == "mod")
            return Token(MULOP, NumericOp::OP_Mod);
        if (name == "div")
            return Token(MULOP, NumericOp::OP_Div);
    }

    // See whether we are at a :
    if (peekCurHelper() == ':') {
        m_nextPos++;
        // Any chance it's an axis name?
        if (peekCurHelper() == ':') {
            m_nextPos++;
            
            //It might be an axis name.
            Step::Axis axis;
            if (parseAxisName(name, axis))
                return Token(AXISNAME, axis);
            // Ugh, :: is only valid in axis names -> error
            return Token(XPATH_ERROR);
        }

        // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
        skipWS();
        if (peekCurHelper() == '*') {
            m_nextPos++;
            return Token(NAMETEST, name + ":*");
        }
        
        // Make a full qname.
        String n2;
        if (!lexNCName(n2))
            return Token(XPATH_ERROR);
        
        name = name + ":" + n2;
    }

    skipWS();

    if (peekCurHelper() == '(') {
        // note: we don't swallow the '(' here!

        // Either node type oor function name.

        if (name == "processing-instruction")
            return Token(PI);
        if (name == "node")
            return Token(NODE);
        if (name == "text")
            return Token(TEXT);
        if (name == "comment")
            return Token(COMMENT);

        return Token(FUNCTIONNAME, name);
    }

    // At this point, it must be NAMETEST.
    return Token(NAMETEST, name);
}
Ejemplo n.º 2
0
Archivo: Lexer.c Proyecto: SOM-st/CSOM
Symbol Lexer_get_sym(Lexer* l) {
  if (l->peekDone) {
    l->peekDone = false;
    l->sym = l->nextSym;
    l->symc = l->nextSymc;
    strcpy(l->text, l->nextText);
    return l->sym;
  }

  do {
    if (EOB)
      fillbuffer(l);
    skipWhiteSpace(l);
    skipComment(l);
  } while((EOB || isblank(_BC) || _BC == '"') && l->infile);

  if(_BC == '\'') {
    lexString(l);
  }
  else _MATCH('[', NewBlock)
  else _MATCH(']', EndBlock)
  else if(_BC == ':') {
    if(l->buf[l->bufp+1] == '=') {
      l->bufp += 2;
      l->sym = Assign;
      l->symc = 0;
      sprintf(l->text, ":=");
    } else {
      l->bufp++;
      l->sym = Colon;
      l->symc = ':';
      sprintf(l->text, ":");
    }
  }
  else _MATCH('(', NewTerm)
  else _MATCH(')', EndTerm)
  else _MATCH('#', Pound)
  else _MATCH('^', Exit)
  else _MATCH('.', Period)
  else if(_BC == '-') {
    if(!strncmp(l->buf + l->bufp, SEPARATOR, strlen(SEPARATOR))) {
      char* t = l->text;
      while(_BC == '-')
        *t++ = l->buf[l->bufp++];
      *t = 0;
      l->sym = Separator;
    } else {
      lexOperator(l);
    }
  }
  else if(_ISOP(_BC)) {
    lexOperator(l);
  }
  else if(!strncmp(l->buf + l->bufp, PRIMITIVE, strlen(PRIMITIVE))) {
    l->bufp += strlen(PRIMITIVE);
    l->sym = Primitive;
    l->symc = 0;
    sprintf(l->text, PRIMITIVE);
  }
  else if(isalpha(_BC)) {
    char* t = l->text;
    l->symc = 0;
    while(isalpha(_BC) || isdigit(_BC) || _BC == '_')
      *t++ = l->buf[l->bufp++];
    l->sym = Identifier;
    if(l->buf[l->bufp] == ':') {
      l->sym = Keyword;
      l->bufp++;
      *t++ = ':';
      if(isalpha(_BC)) {
        l->sym = KeywordSequence;
        while(isalpha(_BC) || _BC == ':')
          *t++ = l->buf[l->bufp++];
      }
    }
    *t = 0;
  }
  else if(isdigit(_BC)) {
    lexNumber(l);
  }
  else {
    l->sym = NONE;
    l->symc = _BC;
    sprintf(l->text, "%c", _BC);
  }
  return l->sym;
}
Ejemplo n.º 3
0
yylex()
{
	char	c;

	for (;;) {
		c = input();
		oldnewline = newline;
		newline = FALSE;
		switch (char_type[c]) {
			Case C_SLASH:
				if (oldnewline) {
					parseRawline();
					return(Rawline);
				}
				c = input();
				if (c == '*')
					comment();
				else {
					unput(c);
					return(DIV);
				}

			Case C_ZERO:
				c = input();
				if (c == 'x' || c == 'X')
					parseNumber(16, input());
				else if (c == 'b' || c == 'B')
					parseNumber(2, input());
				else if (c == 'q' || c == 'Q')
					parseNumber(4, input());
				else
					parseNumber(8, c);
				return(Number);

			Case C_DIG:
				parseNumber(10, c);
				return(Number);

			Case C_ALPH:
				parseName(c);
				if ((yylval = matchKeyword(yytext)) != 0)
					return(yylval);
				yylval = lookupSymbol(yytext);
/*				  if (debug)
				     printf("lexer: Name '%s'\n", yytext);*/
				 return(Name);

			Case C_QUOTE:
				lexString('"');
				string();
				return(String);

			Case C_APOSTROPHE:
				lexString('\'');
				c = input();
				if (c == 'b' || c == 'B') {
					bitString();
					return(BitString);
				} else {
					string();
					unput(c);
					return(String);
				}

			Case C_LIT:
				return(litCode[c]);

			Case C_NL:
				newline = TRUE;
		}
	}
}
Ejemplo n.º 4
0
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
{
    while (m_ptr < m_end && isASCIISpace(*m_ptr))
        ++m_ptr;

    ASSERT(m_ptr <= m_end);
    if (m_ptr >= m_end) {
        token.type = TokEnd;
        token.start = token.end = m_ptr;
        return TokEnd;
    }
    token.type = TokError;
    token.start = m_ptr;
    switch (*m_ptr) {
        case '[':
            token.type = TokLBracket;
            token.end = ++m_ptr;
            return TokLBracket;
        case ']':
            token.type = TokRBracket;
            token.end = ++m_ptr;
            return TokRBracket;
        case '(':
            token.type = TokLParen;
            token.end = ++m_ptr;
            return TokLBracket;
        case ')':
            token.type = TokRParen;
            token.end = ++m_ptr;
            return TokRBracket;
        case '{':
            token.type = TokLBrace;
            token.end = ++m_ptr;
            return TokLBrace;
        case '}':
            token.type = TokRBrace;
            token.end = ++m_ptr;
            return TokRBrace;
        case ',':
            token.type = TokComma;
            token.end = ++m_ptr;
            return TokComma;
        case ':':
            token.type = TokColon;
            token.end = ++m_ptr;
            return TokColon;
        case '"':
        case '\'':
            return lexString(token);

        // Numbers are trickier so we only allow the most basic form, basically
        // * [1-9][0-9]*(\.[0-9]*)?
        // * \.[0-9]*
        // * 0(\.[0-9]*)?
        case '0':
            // If a number starts with 0 it's expected to be octal.  It seems silly
            // to attempt to handle this case, so we abort
            if (m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1]))
                return TokError;
            return lexNumber(token);
        case '.':
            // If a number starts with a '.' it must be followed by a digit
            if (!(m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1])))
                return TokError;
            return lexNumber(token);
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            return lexNumber(token);
    }
    return TokError;
}