Пример #1
0
 Token *Lexer::nextToken() {
     while(true) {
         switch(_currentChar.toAscii()) {
         case '\'': return scanCharacter();
         case '"': return scanText();
         case '(': return scan(Token::LeftParenthesis);
         case ')': return scan(Token::RightParenthesis);
         case '[': return scan(Token::LeftBracket);
         case ']': return scanRightBracket();
         case '{': return scan(Token::LeftBrace);
         case '}': return scan(Token::RightBrace);
         case ';': return scan(Token::Semicolon);
         default:
             if(isEof()) return scan(Token::Eof);
             else if(isLineComment()) consumeLineComment();
             else if(isBlockComment()) consumeBlockComment();
             else if(isNewline()) return scanNewline();
             else if(isSpace()) consumeSpaces();
             else if(isName()) return scanName();
             else if(isBackquotedName()) return scanBackquotedName();
             else if(isNumber()) return scanNumber();
             else if(isOperator()) return scanOperator();
             else throw lexerException(QString("invalid character: '%1'").arg(_currentChar));
         }
     }
 }
Пример #2
0
void Calculator::operButtonPressed(QString operation)
{
    currOperand = scanNumber(displayString);
    clearDisplayString();
    ui->nDotButton->setEnabled(true);

    if (currOper != "")
    {
        SimpleCalculator* simpleCalc = new SimpleCalculator();
        QObject::connect(simpleCalc, SIGNAL(resultChanged(double)),
                         this, SLOT(setResult(double)));
        simpleCalc->changeFirstOperand(result);
        simpleCalc->changeSecondOperand(currOperand);
        simpleCalc->changeOperation(currOper);
        simpleCalc->getResult();
        currOper = operation;
        delete simpleCalc;

    }
Пример #3
0
static void
testRegexpFile(const char *filename) {
    FILE *input;
    char expr[5000];
    int len;
    int ret;
    int i;
    xmlAutomataPtr am;
    xmlAutomataStatePtr states[1000];
    xmlRegexpPtr regexp = NULL;
    xmlRegExecCtxtPtr exec = NULL;

    for (i = 0;i<1000;i++)
	states[i] = NULL;

    input = fopen(filename, "r");
    if (input == NULL) {
        xmlGenericError(xmlGenericErrorContext,
		"Cannot open %s for reading\n", filename);
	return;
    }

    am = xmlNewAutomata();
    if (am == NULL) {
        xmlGenericError(xmlGenericErrorContext,
		"Cannot create automata\n");
	fclose(input);
	return;
    }
    states[0] = xmlAutomataGetInitState(am);
    if (states[0] == NULL) {
        xmlGenericError(xmlGenericErrorContext,
		"Cannot get start state\n");
	xmlFreeAutomata(am);
	fclose(input);
	return;
    }
    ret = 0;

    while (fgets(expr, 4500, input) != NULL) {
	if (expr[0] == '#')
	    continue;
	len = strlen(expr);
	len--;
	while ((len >= 0) && 
	       ((expr[len] == '\n') || (expr[len] == '\t') ||
		(expr[len] == '\r') || (expr[len] == ' '))) len--;
	expr[len + 1] = 0;      
	if (len >= 0) {
	    if ((am != NULL) && (expr[0] == 't') && (expr[1] == ' ')) {
		char *ptr = &expr[2];
		int from, to;

		from = scanNumber(&ptr);
		if (*ptr != ' ') {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad line %s\n", expr);
		    break;
		}
		if (states[from] == NULL)
		    states[from] = xmlAutomataNewState(am);
		ptr++;
		to = scanNumber(&ptr);
		if (*ptr != ' ') {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad line %s\n", expr);
		    break;
		}
		if (states[to] == NULL)
		    states[to] = xmlAutomataNewState(am);
		ptr++;
		xmlAutomataNewTransition(am, states[from], states[to],
			                 BAD_CAST ptr, NULL);
	    } else if ((am != NULL) && (expr[0] == 'e') && (expr[1] == ' ')) {
		char *ptr = &expr[2];
		int from, to;

		from = scanNumber(&ptr);
		if (*ptr != ' ') {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad line %s\n", expr);
		    break;
		}
		if (states[from] == NULL)
		    states[from] = xmlAutomataNewState(am);
		ptr++;
		to = scanNumber(&ptr);
		if (states[to] == NULL)
		    states[to] = xmlAutomataNewState(am);
		xmlAutomataNewEpsilon(am, states[from], states[to]);
	    } else if ((am != NULL) && (expr[0] == 'f') && (expr[1] == ' ')) {
		char *ptr = &expr[2];
		int state;

		state = scanNumber(&ptr);
		if (states[state] == NULL) {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad state %d : %s\n", state, expr);
		    break;
		}
		xmlAutomataSetFinalState(am, states[state]);
	    } else if ((am != NULL) && (expr[0] == 'c') && (expr[1] == ' ')) {
		char *ptr = &expr[2];
		int from, to;
		int min, max;

		from = scanNumber(&ptr);
		if (*ptr != ' ') {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad line %s\n", expr);
		    break;
		}
		if (states[from] == NULL)
		    states[from] = xmlAutomataNewState(am);
		ptr++;
		to = scanNumber(&ptr);
		if (*ptr != ' ') {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad line %s\n", expr);
		    break;
		}
		if (states[to] == NULL)
		    states[to] = xmlAutomataNewState(am);
		ptr++;
		min = scanNumber(&ptr);
		if (*ptr != ' ') {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad line %s\n", expr);
		    break;
		}
		ptr++;
		max = scanNumber(&ptr);
		if (*ptr != ' ') {
		    xmlGenericError(xmlGenericErrorContext,
			    "Bad line %s\n", expr);
		    break;
		}
		ptr++;
		xmlAutomataNewCountTrans(am, states[from], states[to],
			                 BAD_CAST ptr, min, max, NULL);
	    } else if ((am != NULL) && (expr[0] == '-') && (expr[1] == '-')) {
		
		regexp = xmlAutomataCompile(am);
		xmlFreeAutomata(am);
		am = NULL;
		if (regexp == NULL) {
		    xmlGenericError(xmlGenericErrorContext,
			    "Failed to compile the automata");
		    break;
		}
	    } else if ((expr[0] == '=') && (expr[1] == '>')) {
		if (regexp == NULL) {
		    printf("=> failed not compiled\n");
		} else {
		    if (exec == NULL)
			exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
		    if (ret == 0) {
			ret = xmlRegExecPushString(exec, NULL, NULL);
		    }
		    if (ret == 1)
			printf("=> Passed\n");
		    else if ((ret == 0) || (ret == -1))
			printf("=> Failed\n");
		    else if (ret < 0)
			printf("=> Error\n");
		    xmlRegFreeExecCtxt(exec);
		    exec = NULL;
		}
		ret = 0;
	    } else if (regexp != NULL) {
		if (exec == NULL)
		    exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
		ret = xmlRegExecPushString(exec, BAD_CAST expr, NULL);
	    } else {
		xmlGenericError(xmlGenericErrorContext,
			"Unexpected line %s\n", expr);
	    }
	}
    }
    fclose(input);
    if (regexp != NULL)
	xmlRegFreeRegexp(regexp);
    if (exec != NULL)
	xmlRegFreeExecCtxt(exec);
    if (am != NULL)
	xmlFreeAutomata(am);
}
Пример #4
0
std::deque<Token*> PreprocessorLexer::scanDirective(const std::string& directive) const {
    std::deque<Token*> tokenList;

    size_t stringStart = std::string::npos;
    for (size_t pos = 0; pos < directive.length(); ++pos) {
        const char top = directive[pos];
        const char next = ((pos < (directive.length() - 1)) ? directive[pos + 1] : '\0');

        if (stringStart != std::string::npos) {
            switch (top) {
                case '\\':
                    if (next == '"')
                        ++pos;
                    break;

                case '"':
                    tokenList.push_back( new StringToken(PreprocessorTerminals::ID_STRING,
                            directive.substr(stringStart, (pos - stringStart))) );
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_QUOTE));
                    stringStart = std::string::npos;
                    break;

                default:
                    break;
            } // switch (top)

            continue;
        }   // if (readingString

        // omit any kind of whitespace within the directive
        //
        if (isWhitespace(top) == true)
            continue;

        bool consumed = true;

        // at first determine characters which correspond to preprocessor operators
        //
        switch (top) {
            case '#':
                if (next == '#') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_FFENCE));
                    ++pos;
                } else if ((next == 0) || (isWhitespace(next)))
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_FENCE));
                else
                    consumed = false;
                break;

            case '(':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_LPAREN));
                break;

            case ')':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_RPAREN));
                break;

            case ',':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_COMMA));
                break;

            case '+':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_PLUS));
                break;

            case '-':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_DASH));
                break;

            case '~':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_COMPLEMENT));
                break;

            case '!':
                if (next == '=') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_NEQ));
                    ++pos;
                } else
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_NOT));
                break;

            case '*':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_MUL));
                break;

            case '/':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_DIV));
                break;

            case '%':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_MOD));
                break;

            case '<':
                if (next == '<') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_LSHIFT));
                    ++pos;
                } else if (next == '=') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_LEQ));
                    ++pos;
                } else
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_LESS));
                break;

            case '>':
                if (next == '>') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_OP_RSHIFT));
                    ++pos;
                } else if (next == '=') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_GEQ));
                    ++pos;
                } else
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_GREATER));
                break;

            case '=':
                if (next == '=') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_EQ));
                    ++pos;
                } else
                    consumed = false;
                break;

            case '&':
                if (next == '&') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_LOGICAL_AND));
                    ++pos;
                } else
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_BIT_AND));
                break;

            case '|':
                if (next == '|') {
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_LOGICAL_OR));
                    ++pos;
                } else
                    tokenList.push_back(new Token(PreprocessorTerminals::ID_BIT_OR));
                break;

            case '^':
                tokenList.push_back(new Token(PreprocessorTerminals::ID_BIT_XOR));
                break;

            case '"':
                stringStart = pos + 1;  // indicates the start of a string which will be read.
                tokenList.push_back(new Token(PreprocessorTerminals::ID_QUOTE));
                break;

            default:
                consumed = false;
                break;
        }   // switch (top)

        if (consumed == true)
            continue;

        if ((isAlpha(top) == true) || (top == '#')) {
            std::string identifier(&top, 1);
            for (++pos; pos < directive.length(); ++pos) {
                const char top = directive[pos];
                if ((isAlpha(top) == true) || (isDigit(top) == true))
                    identifier += top;
                else {
                    --pos;
                    break;
                }
            }

            KeywordMap::const_iterator it = keywords_.find(identifier);
            if (it != keywords_.end())
                tokenList.push_back(new Word(it->second));
            else
                tokenList.push_back(new IdentifierToken(PreprocessorTerminals::ID_IDENTIFIER, identifier));
        } else if ((isDigit(top) == true) || ((top == '.') && (isDigit(next) == true))) {
            size_t end = 0;
            ConstantToken* ct = scanNumber(directive.substr(pos), end);
            if (ct != 0)
                tokenList.push_back(ct);
            pos += end;
        } else
            tokenList.push_back(new TextToken(PreprocessorTerminals::ID_TEXT, std::string(&top, 1)));
    }   // for (pos < length)

    return tokenList;
}
Пример #5
0
// ---------------------------------------------------------------------------
//  XPathScanner: Scan methods
// ---------------------------------------------------------------------------
bool XPathScanner::scanExpression(const XMLCh* const data,
                                  int currentOffset,
                                  const int endOffset,
                                  ValueVectorOf<int>* const tokens) {

    bool      starIsMultiplyOperator = false;
    int       nameOffset = -1;
    int       nameHandle = -1;
    int       prefixHandle = -1;
    XMLCh     ch;
    XMLBuffer dataBuffer(128, tokens->getMemoryManager());

    while (true) {

        if (currentOffset == endOffset) {
            break;
        }

        ch = data[currentOffset];

        while (XMLChar1_0::isWhitespace(ch)) {

            if (++currentOffset == endOffset) {
                break;
            }

            ch = data[currentOffset];
        }

        if (currentOffset == endOffset) {
            break;
        }
        //
        // [28] ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
        //                  | NameTest | NodeType | Operator | FunctionName
        //                  | AxisName | Literal | Number | VariableReference
        //
        XMLByte chartype = (ch >= 0x80) ? CHARTYPE_NONASCII : fASCIICharMap[ch];

        switch (chartype) {
        case CHARTYPE_OPEN_PAREN:       // '('
            addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_PAREN);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_CLOSE_PAREN:      // ')'
            addToken(tokens, XercesXPath::EXPRTOKEN_CLOSE_PAREN);
            starIsMultiplyOperator = true;
            ++currentOffset;
            break;
        case CHARTYPE_OPEN_BRACKET:     // '['
            addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_BRACKET);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_CLOSE_BRACKET:    // ']'
            addToken(tokens, XercesXPath::EXPRTOKEN_CLOSE_BRACKET);
            starIsMultiplyOperator = true;
            ++currentOffset;
            break;
                //
                // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
                //                                         ^^^^^^^^^^
                //
        case CHARTYPE_PERIOD:           // '.', '..' or '.' Digits
            if (currentOffset + 1 == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset++;
                break;
            }

            ch = data[currentOffset + 1];

            if (ch == chPeriod) {            // '..'
                addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset += 2;
            } else if (ch >= chDigit_0 && ch <= chDigit_9) {
                addToken(tokens, XercesXPath::EXPRTOKEN_NUMBER);
                starIsMultiplyOperator = true;
                currentOffset = scanNumber(data, endOffset, currentOffset, tokens);
            } else if (ch == chForwardSlash) {
                addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset++;
            } else if (ch == chPipe) { // '|'
                addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset++;
            } else if (XMLChar1_0::isWhitespace(ch)) {
                do {
                    if (++currentOffset == endOffset)
                        break;

                    ch = data[currentOffset];
                } while (XMLChar1_0::isWhitespace(ch));

                if (currentOffset == endOffset || ch == chPipe) {
				    addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                    starIsMultiplyOperator = true;
                    break;
                }
            } else {
                ThrowXMLwithMemMgr(XPathException, XMLExcepts::XPath_InvalidChar, tokens->getMemoryManager());
            }

            break;
        case CHARTYPE_ATSIGN:           // '@'
            addToken(tokens, XercesXPath::EXPRTOKEN_ATSIGN);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_COMMA:            // ','
            addToken(tokens, XercesXPath::EXPRTOKEN_COMMA);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_COLON:            // '::'
            if (++currentOffset == endOffset) {
                return false; // REVISIT
            }
            ch = data[currentOffset];

            if (ch != chColon) {
                return false; // REVISIT
            }
            addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_COLON);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_SLASH:            // '/' and '//'
            if (++currentOffset == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_SLASH);
                starIsMultiplyOperator = false;
                break;
            }

            ch = data[currentOffset];

            if (ch == chForwardSlash) { // '//'
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_DOUBLE_SLASH);
                starIsMultiplyOperator = false;
                ++currentOffset;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_SLASH);
                starIsMultiplyOperator = false;
            }
            break;
        case CHARTYPE_UNION:            // '|'
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_UNION);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_PLUS:             // '+'
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_PLUS);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_MINUS:            // '-'
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MINUS);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_EQUAL:            // '='
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_EQUAL);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_EXCLAMATION:      // '!='
            if (++currentOffset == endOffset) {
                return false; // REVISIT
            }

            ch = data[currentOffset];

            if (ch != chEqual) {
                return false; // REVISIT
            }

            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_NOT_EQUAL);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_LESS: // '<' and '<='
            if (++currentOffset == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS);
                starIsMultiplyOperator = false;
                break;
            }

            ch = data[currentOffset];

            if (ch == chEqual) { // '<='
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS_EQUAL);
                starIsMultiplyOperator = false;
                ++currentOffset;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS);
                starIsMultiplyOperator = false;
            }
            break;
        case CHARTYPE_GREATER: // '>' and '>='
            if (++currentOffset == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER);
                starIsMultiplyOperator = false;
                break;
            }

            ch = data[currentOffset];

            if (ch == chEqual) { // '>='
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER_EQUAL);
                starIsMultiplyOperator = false;
                ++currentOffset;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER);
                starIsMultiplyOperator = false;
            }
            break;
        //
        // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'"
        //
        case CHARTYPE_QUOTE:            // '\"' or '\''
            {
                XMLCh qchar = ch;
                if (++currentOffset == endOffset) {
                    return false; // REVISIT
                }

                ch = data[currentOffset];

                int litOffset = currentOffset;
                while (ch != qchar) {
                    if (++currentOffset == endOffset) {
                        return false; // REVISIT
                    }

                    ch = data[currentOffset];
                }

                addToken(tokens, XercesXPath::EXPRTOKEN_LITERAL);
                starIsMultiplyOperator = true;

                dataBuffer.set(data + litOffset, currentOffset - litOffset);
                tokens->addElement(fStringPool->addOrFind(dataBuffer.getRawBuffer()));
                ++currentOffset;
                break;
            }
        //
        // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
        // [31] Digits ::= [0-9]+
        //
        case CHARTYPE_DIGIT:
            addToken(tokens, XercesXPath::EXPRTOKEN_NUMBER);
            starIsMultiplyOperator = true;
            currentOffset = scanNumber(data, endOffset, currentOffset, tokens);
            break;
        //
        // [36] VariableReference ::= '$' QName
        //
        case CHARTYPE_DOLLAR:
            if (++currentOffset == endOffset) {
                return false; // REVISIT
            }
            nameOffset = currentOffset;
            currentOffset = scanNCName(data, endOffset, currentOffset);

            if (currentOffset == nameOffset) {
                return false; // REVISIT
            }

            if (currentOffset < endOffset) {
                ch = data[currentOffset];
            }
            else {
                ch = 0;
            }

            dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
            nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
            prefixHandle = -1;

            if (ch == chColon) {

                prefixHandle = nameHandle;
                if (++currentOffset == endOffset) {
                    return false; // REVISIT
                }
                nameOffset = currentOffset;
                currentOffset = scanNCName(data, endOffset, currentOffset);

                if (currentOffset == nameOffset) {
                    return false; // REVISIT
                }

                dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
                nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
            }
            addToken(tokens, XercesXPath::EXPRTOKEN_VARIABLE_REFERENCE);
            starIsMultiplyOperator = true;
            tokens->addElement(prefixHandle);
            tokens->addElement(nameHandle);
            break;
        //
        // [37] NameTest ::= '*' | NCName ':' '*' | QName
        // [34] MultiplyOperator ::= '*'
        //
        case CHARTYPE_STAR:             // '*'
            //
            // 3.7 Lexical Structure
            //
            //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
            //  an Operator, then a * must be recognized as a MultiplyOperator.
            //
            // Otherwise, the token must not be recognized as a MultiplyOperator.
            //
            if (starIsMultiplyOperator) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MULT);
                starIsMultiplyOperator = false;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_ANY);
                starIsMultiplyOperator = true;
            }

            ++currentOffset;
            break;
        //
        // NCName, QName and non-terminals
        //
        case CHARTYPE_NONASCII: // possibly a valid non-ascii 'Letter' (BaseChar | Ideographic)
        case CHARTYPE_LETTER:
        case CHARTYPE_UNDERSCORE:
            //
            // 3.7 Lexical Structure
            //
            //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
            //  an Operator, then an NCName must be recognized as an OperatorName.
            //
            //  If the character following an NCName (possibly after intervening ExprWhitespace) is (,
            //  then the token must be recognized as a NodeType or a FunctionName.
            //
            //  If the two characters following an NCName (possibly after intervening ExprWhitespace)
            //  are ::, then the token must be recognized as an AxisName.
            //
            //  Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
            //  FunctionName, or an AxisName.
            //
            // [33] OperatorName ::= 'and' | 'or' | 'mod' | 'div'
            // [38] NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
            // [35] FunctionName ::= QName - NodeType
            // [6] AxisName ::= (see above)
            //
            // [37] NameTest ::= '*' | NCName ':' '*' | QName
            // [5] NCName ::= (Letter | '_') (NCNameChar)*
            // [?] NCNameChar ::= Letter | Digit | '.' | '-' | '_'  (ascii subset of 'NCNameChar')
            // [?] QName ::= (NCName ':')? NCName
            // [?] Letter ::= [A-Za-z]                              (ascii subset of 'Letter')
            // [?] Digit ::= [0-9]                                  (ascii subset of 'Digit')
            //
            nameOffset = currentOffset;
            currentOffset = scanNCName(data, endOffset, currentOffset);
            if (currentOffset == nameOffset) {
                return false; // REVISIT
			}

            if (currentOffset < endOffset) {
                ch = data[currentOffset];
            }
            else {
                ch = 0;
            }

            dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
            nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());

            bool isNameTestNCName = false;
            bool isAxisName = false;
            prefixHandle = -1;

            if (ch == chColon) {

                if (++currentOffset == endOffset) {
                    return false; // REVISIT
                }

                ch = data[currentOffset];

                if (ch == chAsterisk) {
                    if (++currentOffset < endOffset) {
                        ch = data[currentOffset];
                    }

                    isNameTestNCName = true;
                } else if (ch == chColon) {
                    if (++currentOffset < endOffset) {
                        ch = data[currentOffset];
                    }

                    isAxisName = true;
                } else {
                    prefixHandle = nameHandle;
                    nameOffset = currentOffset;
                    currentOffset = scanNCName(data, endOffset, currentOffset);
                    if (currentOffset == nameOffset) {
                        return false; // REVISIT
                    }
                    if (currentOffset < endOffset) {
                        ch = data[currentOffset];
                    }
                    else {
                        ch = 0;
                    }

                    dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
                    nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
                }
            }
            //
            // [39] ExprWhitespace ::= S
            //
            while (XMLChar1_0::isWhitespace(ch)) {
                if (++currentOffset == endOffset) {
                    break;
                }
                ch = data[currentOffset];
            }

            //
            //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
            //  an Operator, then an NCName must be recognized as an OperatorName.
            //
            if (starIsMultiplyOperator) {
                if (nameHandle == fAndSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_AND);
                    starIsMultiplyOperator = false;
                } else if (nameHandle == fOrSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_OR);
                    starIsMultiplyOperator = false;
                } else if (nameHandle == fModSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MOD);
                    starIsMultiplyOperator = false;
                } else if (nameHandle == fDivSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_DIV);
                    starIsMultiplyOperator = false;
                } else {
                    return false; // REVISIT
                }

                if (isNameTestNCName) {
                    return false; // REVISIT - NCName:* where an OperatorName is required
                } else if (isAxisName) {
                    return false; // REVISIT - AxisName:: where an OperatorName is required
                }
                break;
            }
            //
            //  If the character following an NCName (possibly after intervening ExprWhitespace) is (,
            //  then the token must be recognized as a NodeType or a FunctionName.
            //
            if (ch == chOpenParen && !isNameTestNCName && !isAxisName) {
                if (nameHandle == fCommentSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_COMMENT);
                } else if (nameHandle == fTextSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_TEXT);
                } else if (nameHandle == fPISymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_PI);
                } else if (nameHandle == fNodeSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_NODE);
                } else {
                    addToken(tokens, XercesXPath::EXPRTOKEN_FUNCTION_NAME);
                    tokens->addElement(prefixHandle);
                    tokens->addElement(nameHandle);
                }
                addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_PAREN);
                starIsMultiplyOperator = false;
                ++currentOffset;
                break;
            }

            //
            //  If the two characters following an NCName (possibly after intervening ExprWhitespace)
            //  are ::, then the token must be recognized as an AxisName.
            //
            if (isAxisName ||
                (ch == chColon && currentOffset + 1 < endOffset &&
                 data[currentOffset + 1] == chColon)) {

                if (nameHandle == fAncestorSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ANCESTOR);
                } else if (nameHandle == fAncestorOrSelfSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF);
                } else if (nameHandle == fAttributeSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ATTRIBUTE);
                } else if (nameHandle == fChildSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_CHILD);
                } else if (nameHandle == fDescendantSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_DESCENDANT);
                } else if (nameHandle == fDescendantOrSelfSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF);
                } else if (nameHandle == fFollowingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_FOLLOWING);
                } else if (nameHandle == fFollowingSiblingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING);
                } else if (nameHandle == fNamespaceSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_NAMESPACE);
                } else if (nameHandle == fParentSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PARENT);
                } else if (nameHandle == fPrecedingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PRECEDING);
                } else if (nameHandle == fPrecedingSiblingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PRECEDING_SIBLING);
                } else if (nameHandle == fSelfSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_SELF);
                } else {
                    return false; // REVISIT
                }

                if (isNameTestNCName) {
                    return false; // REVISIT - "NCName:* ::" where "AxisName ::" is required
                }

                addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_COLON);
                starIsMultiplyOperator = false;
                if (!isAxisName) {
                    currentOffset += 2;
                }
                break;
            }
            //
            //  Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
            //  FunctionName, or an AxisName.
            //
            if (isNameTestNCName) {
                addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_NAMESPACE);
                tokens->addElement(nameHandle);
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_QNAME);
                tokens->addElement(prefixHandle);
                tokens->addElement(nameHandle);
            }

            starIsMultiplyOperator = true;
            break;
        }
    }

    return true;
}
Пример #6
0
std::string TokenScanner::nextToken() {
    if (savedTokens) {
        StringCell* cp = savedTokens;
        std::string token = cp->str;
        savedTokens = cp->link;
        delete cp;
        return token;
    }

    while (true) {
        if (ignoreWhitespaceFlag) {
            skipSpaces();
        }
        int ch = isp->get();
        if (ch == '/' && ignoreCommentsFlag) {
            ch = isp->get();
            if (ch == '/') {
                while (true) {
                    ch = isp->get();
                    if (ch == '\n' || ch == '\r' || ch == EOF) {
                        break;
                    }
                }
                continue;
            } else if (ch == '*') {
                int prev = EOF;
                while (true) {
                    ch = isp->get();
                    if (ch == EOF || (prev == '*' && ch == '/')) {
                        break;
                    }
                    prev = ch;
                }
                continue;
            }
            if (ch != EOF) {
                isp->unget();
            }
            ch = '/';
        }
        if (ch == EOF) {
            return "";
        }
        if ((ch == '"' || ch == '\'') && scanStringsFlag) {
            isp->unget();
            return scanString();
        }
        if (isdigit(ch) && scanNumbersFlag) {
            isp->unget();
            return scanNumber();
        }
        if (isWordCharacter(ch)) {
            isp->unget();
            return scanWord();
        }
        std::string op = std::string(1, ch);
        while (isOperatorPrefix(op)) {
            ch = isp->get();
            if (ch == EOF) {
                break;
            }
            op += ch;
        }
        while (op.length() > 1 && !isOperator(op)) {
            isp->unget();
            op.erase(op.length() - 1, 1);
        }
        return op;
    }
}
Пример #7
0
// BRACE_OPEN [0-9]+ ( ':' ([0-9]* '.' [0-9]* )? ('x' | 'X' | 'o')? '!'? )? BRACE_CLOSE
bool Format::processFormatSpecifier(String& str, size_t& offset, size_t& argnum, FormatFlags& flags)
{
	bool retval = true;

	if( str[offset] == BRACE_OPEN )
	{
		++offset;

		// Get the argument number.
		int arg;
		int argsign;
		bool unused;
		if( scanNumber(str, offset, arg, argsign, unused) )
		{
			// Don't allow signs on the number.
			if( argsign != 0 )
			{
				retval = false;
			}

			argnum = arg;
		}
		else
		{
			retval = false;
		}

		// Get the format width/output specifiers.
		if( str[offset] == ':' )
		{
			++offset;

			int val;
			int sign;
			bool leadingZero;
			if( scanNumber(str, offset, val, sign, leadingZero) )
			{
				if( sign == -1 )
				{
					val *= -1;
					flags.flags |= FormatFlags::ALIGN_LEFT;
				}
				else if( sign == 1 )
				{
					flags.flags |= FormatFlags::SHOW_PLUS;
				}

				if( leadingZero && sign != -1)
				{
					flags.fill = '0';
				}
				flags.width = val;
			}

			if( (offset < str.length()) && (str[offset] == '.') )
			{
				size_t j = offset + 1;
				if( scanNumber(str, j, val, sign, leadingZero) )
				{
					flags.precision = val;
					offset = j;
				}
			}

			if( offset < str.length() )
			{
				if( str[offset] == 'x' )
				{
					flags.flags |= FormatFlags::HEX;
					++offset;
				}
				else if( str[offset] == 'X' )
				{
					flags.flags |= FormatFlags::HEX | FormatFlags::UPPERCASE;
					++offset;
				}
				else if( str[offset] == 'o' )
				{
					flags.flags |= FormatFlags::OCT;
					++offset;
				}

				if( offset < str.length() && str[offset] == '!' )
				{
					flags.flags |= FormatFlags::SHOW_BASE;
					++offset;
				}
			}
		}

		if( (offset < str.length()) && (str[offset] == BRACE_CLOSE) )
		{
			++offset;
		}
		else
		{
			// No close brace.
			retval = false;
		}
	}

	return retval;
}
Пример #8
0
Token* AnnotationLexer::nextToken() {
    Token* token = 0;
    int tokenLine = getCurrentLineNumber();
    int tokenStart = getCurrentCharNumberInLine();

    try {
        do {
            if (readingString_) {
                readChar();
                if (peek_ == '"') {
                    readingString_ = false;
                    return new Token(AnnotationTerminals::ID_QUOTE);
                } else {
                    std::string str;
                    do {
                        str += peek_;
                        if (nextChar() != '"')
                            readChar();
                        else
                            break;
                    } while (true);
                    return new StringToken(AnnotationTerminals::ID_STRING, str);
                }
            }

            // Skip whitespace including newlines
            //
            do {
                readChar();
            } while (isWhitespace(peek_));

            switch (peek_) {
                case '@':
                    token = new Token(AnnotationTerminals::ID_AT);
                    break;

                case '=':
                    token = new Token(AnnotationTerminals::ID_EQUAL);
                    break;

                case '[':
                    token = new Token(AnnotationTerminals::ID_LBRACKET);
                    break;

                case ']':
                    token = new Token(AnnotationTerminals::ID_RBRACKET);
                    break;

                case ',':
                    token = new Token(AnnotationTerminals::ID_COMMA);
                    break;

                case '\"':
                    token = new Token(AnnotationTerminals::ID_QUOTE);
                    readingString_ = true;
                    break;

                case '\'':
                    token = new Token(AnnotationTerminals::ID_APOSTROPHE);
                    break;
            }   // switch (peek_)

            if (token == 0) {
                if (isAlpha(peek_))
                    token = scanIdentifier(AnnotationTerminals::ID_IDENTIFIER);
                else if (isDigit(peek_) || (peek_ == '.'))
                    token = scanNumber();
            }
        } while (token == 0);
    } catch (std::runtime_error&) {
        if (token != 0) {
            token->setLineNumber(tokenLine);
            token->setCharNumber(tokenStart);
        }

        return token;
    }

    if (token != 0) {
        token->setLineNumber(tokenLine);
        token->setCharNumber(tokenStart);
    }
    return token;
}
Пример #9
0
// private methods
//
Token* PlotFunctionLexer::nextToken() {
    Token* token = 0;
    int tokenLine = getCurrentLineNumber();
    int tokenStart = getCurrentCharNumberInLine();

    try {
        do {
            // Skip whitespace including newlines
            //
            do {
                readChar();
            } while (isWhitespace(peek_));

            switch (peek_) {
            case '.':
                if (isDigit(nextChar()))
                    token = scanNumber();
                else
                    token = new Token(PlotFunctionTerminals::ID_DOT);
                break;  // case '.'

            case '+':
                token = new OperatorToken(PlotFunctionTerminals::ID_PLUS,peek_);
                break;  // case '+'

            case '-':
                token = new OperatorToken(PlotFunctionTerminals::ID_DASH,peek_);
                break;  // case '-'

            case '/':
                token = new OperatorToken(PlotFunctionTerminals::ID_SLASH,peek_);
                break;  // case '/'

            case '*':
                token = new OperatorToken(PlotFunctionTerminals::ID_STAR,peek_);
                break;  // case '*'

            case '[':
                token = new BracketToken(PlotFunctionTerminals::ID_LBRACKET,peek_);
                break;  // case '['

            case ']':
                token = new BracketToken(PlotFunctionTerminals::ID_RBRACKET,peek_);
                break;  // case ']'

            case '(':
                token = new BracketToken(PlotFunctionTerminals::ID_LPAREN,peek_);
                break;  // case '('

            case ')':
                token = new BracketToken(PlotFunctionTerminals::ID_RPAREN,peek_);
                break;  // case ')'

            case '^':
                token = new OperatorToken(PlotFunctionTerminals::ID_CARET,peek_);
                break;  // case '^'

            case ':':
                token = new IsolatorToken(PlotFunctionTerminals::ID_COLON,peek_);
                break;  // case ':'

            case ';':
                token = new IsolatorToken(PlotFunctionTerminals::ID_SEMICOLON,peek_);
                break;  // case ';'

            case ',':
                token = new IsolatorToken(PlotFunctionTerminals::ID_COMMA,peek_);
                break;  // case ','

            case '|':
                token = new IsolatorToken(PlotFunctionTerminals::ID_VERTICAL_BAR,peek_);
                break;  // case '|'

            }   // switch (peek_)

            if (token == 0) {
                if (isAlpha(peek_))
                    token = scanIdentifier();
                else if (isDigit(peek_) || (peek_ == '.'))
                    token = scanNumber();
            }
        } while (token == 0);
    } catch (std::runtime_error&) {
        if (token != 0) {
            token->setLineNumber(tokenLine);
            token->setCharNumber(tokenStart);
        }
        else if (tokenStart == 0)
            token = new IdentifierToken(PlotFunctionTerminals::ID_EMPTY,""); // EMPTY Expression

        return token;
    }

    if (token != 0) {
        token->setLineNumber(tokenLine);
        token->setCharNumber(tokenStart);
    }
    return token;
}