void FMTLexer::mDIGITS(bool _createToken) { int _ttype; antlr::RefToken _token; std::string::size_type _begin = text.length(); _ttype = DIGITS; std::string::size_type _saveIndex; { // ( ... )+ int _cnt138=0; for (;;) { if (((LA(1) >= 0x30 /* '0' */ && LA(1) <= 0x39 /* '9' */ ))) { matchRange('0','9'); } else { if ( _cnt138>=1 ) { goto _loop138; } else {throw antlr::NoViableAltForCharException(LA(1), getFilename(), getLine(), getColumn());} } _cnt138++; } _loop138:; } // ( ... )+ if ( _createToken && _token==antlr::nullToken && _ttype!=antlr::Token::SKIP ) { _token = makeToken(_ttype); _token->setText(text.substr(_begin, text.length()-_begin)); } _returnToken = _token; _saveIndex=0; }
static int match( struct RE *rexp ){ switch( rexp->type ){ case POINT : return text.pos < text.len; case SET : return matchSet ( *rexp ); case BACKREF: return matchBackRef( rexp ); case RANGEAB: return matchRange( rexp, text.ptr[text.pos] ); case META : return matchMeta ( rexp, text.ptr[text.pos] ); default : return matchText ( rexp, text.ptr + text.pos ); } }
void RxCompile::charClass() { bool negate = match('^'); gcstring chars = ""; if (match(']')) chars += "]"; CharMatcher* cm = &CharMatcher::NONE; while (si < sn && src[si] != ']') { CharMatcher* elem; if (matchRange()) { unsigned from = src[si - 3]; unsigned to = src[si - 1]; elem = (from < to) ? CharMatcher::inRange(from, to) : &CharMatcher::NONE; } else if (match("\\d")) elem = digit; else if (match("\\D")) elem = notDigit; else if (match("\\w")) elem = word; else if (match("\\W")) elem = notWord; else if (match("\\s")) elem = space; else if (match("\\S")) elem = notSpace; else if (match("[:")) elem = posixClass(); else { if (si + 1 < sn) match("\\"); chars += gcstring(src + si, 1); si++; continue; } cm = cm->or_(elem); } if (!negate && cm == &CharMatcher::NONE && chars.size() == 1) { emitChars(chars.ptr(), 1); return; } if (chars.size() > 0) cm = cm->or_(CharMatcher::anyOf(chars)); if (negate) cm = cm->negate(); emit(new CharClass(cm, ignoringCase)); }
void FMTLexer::mCHAR(bool _createToken) { int _ttype; antlr::RefToken _token; std::string::size_type _begin = text.length(); _ttype = CHAR; std::string::size_type _saveIndex; { matchRange('\3',static_cast<unsigned char>('\377')); } if ( _createToken && _token==antlr::nullToken && _ttype!=antlr::Token::SKIP ) { _token = makeToken(_ttype); _token->setText(text.substr(_begin, text.length()-_begin)); } _returnToken = _token; _saveIndex=0; }
void PredicateParser::parse(const std::string& fullExpression, size_t& fromPos, size_t endPos) { assert(0 != fromPos); std::stack<char> unmatched; if(mRootParser) { unmatched.push('{'); ++fromPos; } fromPos = skipSpace(fullExpression, fromPos, endPos); auto oldFrom = fromPos; size_t outerSize = unmatched.size(); for(; fromPos < endPos && !(mRootParser && unmatched.empty()); ++fromPos) { char c = fullExpression.at(fromPos); if(!matchRange(unmatched, fullExpression, fromPos, endPos) && outerSize == unmatched.size()) { switch(c) { case '=': parseEqual(fullExpression, fromPos); break; case '!': parseNonEqual(fullExpression, fromPos); break; case '>': parseGreat(fullExpression, fromPos); break; case '<': parseLess(fullExpression, fromPos); break; case '^': parseStartsWith(fullExpression, fromPos); break; case '$': parseEndsWith(fullExpression, fromPos); break; case '*': parseContains(fullExpression, fromPos); break; case '~': parseMatch(fullExpression, fromPos); break; case '&': parseAnd(fullExpression, fromPos); break; case '|': parseOr(fullExpression, fromPos); break; case '+': mOperators.emplace_back(OpInfo{OpInfo::Add, fromPos, fromPos + 1, mOperators.size()}); break; case '-': { size_t lastOpEnd = mOperators.empty() ? oldFrom : mOperators.back().to; int from = fromPos - 1; bool isMinus = isSpace(fullExpression, from , lastOpEnd - 1, -1); if(isMinus) { mOperators.emplace_back(OpInfo{OpInfo::Minus, fromPos, fromPos + 1, mOperators.size()}); } else { mOperators.emplace_back(OpInfo{OpInfo::Sub, fromPos, fromPos + 1, mOperators.size()}); } break; } case '/': { auto lastOp = mOperators.empty() ? nullptr : &mOperators.back(); size_t lastOpEnd = nullptr == lastOp ? oldFrom : lastOp->to; int from = fromPos - 1; bool allSpace = isSpace(fullExpression, from , lastOpEnd - 1, -1); if(nullptr != lastOp && (lastOp->op >= OpInfo::Match) && (lastOp->op <= OpInfo::iNotMatch) && lastOp->to == (from - 1)) { } if((oldFrom != fromPos) && (mOperators.empty() || (mOperators.back().op < OpInfo::Match) && (mOperators.back().op > OpInfo::iNotMatch))) { mOperators.emplace_back(OpInfo{OpInfo::Div, fromPos, fromPos + 1, mOperators.size()}); } else { fromPos = skip2(fullExpression, fromPos + 1, '/', endPos) + 1; } } break; case '%': mOperators.emplace_back(OpInfo{OpInfo::Mod, fromPos, fromPos + 1, mOperators.size()}); break; default: break; } }//match range }//foreach char auto newEnd = fromPos - (mRootParser ? 1 : 0); if(mOperators.empty() && '(' != fullExpression.at(oldFrom)) { auto pOperand = createPrimitive(fullExpression, oldFrom, newEnd); mResult = pOperand; } else { auto pPredicate = createPredicate(fullExpression, 0, mOperators.size(), oldFrom, newEnd); mResult = pPredicate; } }
int RegularExpression::match(Context* const context, const Op* const operations , int offset, const short direction) { const Op* tmpOp = operations; bool ignoreCase = isSet(fOptions, IGNORE_CASE); while (true) { if (tmpOp == 0) break; if (offset > context->fLimit || offset < context->fStart) return -1; switch(tmpOp->getOpType()) { case Op::O_CHAR: if (!matchChar(context, tmpOp->getData(), offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_DOT: if (!matchDot(context, offset, direction)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_RANGE: case Op::O_NRANGE: if (!matchRange(context, tmpOp, offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_ANCHOR: if (!matchAnchor(context, tmpOp->getData(), offset)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_BACKREFERENCE: if (!matchBackReference(context, tmpOp->getData(), offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_STRING: if (!matchString(context, tmpOp->getLiteral(), offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_CLOSURE: { XMLInt32 id = tmpOp->getData(); if (id >= 0) { int prevOffset = context->fOffsets[id]; if (prevOffset < 0 || prevOffset != offset) { context->fOffsets[id] = offset; } else { context->fOffsets[id] = -1; tmpOp = tmpOp->getNextOp(); break; } } int ret = match(context, tmpOp->getChild(), offset, direction); if (id >= 0) { context->fOffsets[id] = -1; } if (ret >= 0) return ret; tmpOp = tmpOp->getNextOp(); } break; case Op::O_QUESTION: { int ret = match(context, tmpOp->getChild(), offset, direction); if (ret >= 0) return ret; tmpOp = tmpOp->getNextOp(); } break; case Op::O_NONGREEDYCLOSURE: case Op::O_NONGREEDYQUESTION: { int ret = match(context,tmpOp->getNextOp(),offset,direction); if (ret >= 0) return ret; tmpOp = tmpOp->getChild(); } break; case Op::O_UNION: { return matchUnion(context, tmpOp, offset, direction); } case Op::O_CAPTURE: if (context->fMatch != 0 && tmpOp->getData() != 0) return matchCapture(context, tmpOp, offset, direction); tmpOp = tmpOp->getNextOp(); break; case Op::O_LOOKAHEAD: if (0 > match(context, tmpOp->getChild(), offset, 1)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_NEGATIVELOOKAHEAD: if (0 <= match(context, tmpOp->getChild(), offset, 1)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_LOOKBEHIND: if (0 > match(context, tmpOp->getChild(), offset, -1)) return - 1; tmpOp = tmpOp->getNextOp(); break; case Op::O_NEGATIVELOOKBEHIND: if (0 <= match(context, tmpOp->getChild(), offset, -1)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_INDEPENDENT: case Op::O_MODIFIER: { int ret = (tmpOp->getOpType() == Op::O_INDEPENDENT) ? match(context, tmpOp->getChild(), offset, direction) : matchModifier(context, tmpOp, offset, direction); if (ret < 0) return ret; offset = ret; tmpOp = tmpOp->getNextOp(); } break; case Op::O_CONDITION: if (tmpOp->getRefNo() >= fNoGroups) return -1; if (matchCondition(context, tmpOp, offset, direction)) tmpOp = tmpOp->getYesFlow(); else if (tmpOp->getNoFlow() != 0) tmpOp = tmpOp->getNoFlow(); else tmpOp = tmpOp->getNextOp(); break; } } return offset; }
/** Implementation of matchc for the lexer, overrides any * base implementation in the base recognizer. * * \remark * Note that the generated code lays down arrays of ints for constant * strings so that they are int UTF32 form! */ bool Lexer::matchc(Char c) { return matchRange(c, c); }