/* Parse a parenthised expression, optionally followed by a repetition specifier. */ static void ParseParen(ParseInfo *info) { int oldBufInd; int oldMustStr; int oldMustStrBack; int groupNum; int oldMinLen; int oldMaxLen; oldBufInd = info->bufInd; oldMustStr = info->mustStr; oldMustStrBack = info->mustStrBack; oldMinLen = info->minLen; oldMaxLen = info->maxLen; /* Initialize only to satisfy stupid C compilers. */ groupNum = 0; /* Skip '('. */ info->str++; /* Check end of expression. */ if (info->str == info->strEnd) { AGenerateError(info, ErrUnmatchedLparen); return; } groupNum = info->numParen++; Emit(info, A_LPAREN); Emit(info, groupNum); /* Parse the expression inside parentheses. */ ParseAlternatives(info); Emit(info, A_RPAREN); Emit(info, groupNum); if (groupNum < 10) info->parenFlags |= 1 << groupNum; /* Unexpected end of expression? */ if (info->str == info->strEnd || *info->str != ')') AGenerateError(info, ErrUnmatchedLparen); else { /* Skip ')'. */ info->str++; } ParseRepetition(info, oldMinLen, oldMaxLen, info->bufInd - oldBufInd, oldMustStr, oldMustStrBack); }
// Variable Repetition rule? // <a>*<b>element A to B instances // <a>element A instances // *element Zero or more instances. // EXPORT_OUT bool Object::ParseRepetition(char ** ParseAt) { size_t TheMin = 0; size_t TheMax = 0; bool Repetition = ParseRepetition(ParseAt, TheMin, TheMax); this->Min = TheMin; this->Max = TheMax; return(Repetition); }
/* Parse a run of simple and parenthised regular expressions concatenated together. Plain characters, dots and character classes (optionally followed by a repetition specifier) are considered simple. IDEA: \< and friends */ static void ParseConcat(ParseInfo *info) { int litLen; ABool isPrevLit; litLen = 0; isPrevLit = FALSE; while (info->str < info->strEnd) { if (!isPrevLit) litLen = 0; isPrevLit = FALSE; switch (*info->str) { case '(': ParseParen(info); break; case ')': case '|': return; case '^': /* Beginning of line */ Emit(info, A_BOL_MULTI); info->str++; break; case '$': /* End of line */ Emit(info, A_EOL_MULTI); info->str++; break; case '.': /* Any character */ if (info->minLen == 0) memset(info->startChar, 0xff, sizeof(info->startChar)); info->str++; /* IDEA: Perhaps ANY_ALL (i.e. match newlines)? */ ParseSimpleRepetition(info, A_ANY); break; case '[': { /* Character class */ ABool complement = FALSE; AReOpcode set[A_SET_SIZE]; AWideChar ch; int i; WideCharSet wset; int flags = 0; InitWideSet(&wset); info->str++; /* Complement set? */ if (info->str < info->strEnd && *info->str == '^') { info->str++; complement = TRUE; } /* End of expression? */ if (info->str == info->strEnd) AGenerateError(info, ErrUnmatchedLbracket); memset(set, 0, sizeof(set)); do { AReOpcode code; ch = ParseChar(info, &code); if (info->str == info->strEnd) break; if (code >= CC) { int i; const AReOpcode *chClass = ACharClass[(code - CC) & ~CC_COMP]; for (i = 0; i < A_SET_SIZE; i++) { if (code & CC_COMP) set[i] |= ~chClass[i]; else set[i] |= chClass[i]; } if (code == CC_W) flags |= A_WS_WORD_CHAR; else if (code == (CC_W | CC_COMP)) flags |= A_WS_NOT_WORD_CHAR; /* Underline character is part of the \w set. */ if (code == CC_W || code == (CC_W | CC_COMP)) AToggleInSet(set, '_'); if (*info->str == '-') AGenerateError(info, ErrInvalidCharacterSet); } else { /* Character range? */ if (*info->str == '-') { AWideChar hiChar = ch; /* Skip '-', check end of expression. */ if (++info->str == info->strEnd) break; if (*info->str == ']') AAddToSet(set, '-'); else { hiChar = ParseChar(info, &code); if (code >= CC) AGenerateError(info, ErrInvalidCharacterSet); } AddToWideSet(info, &wset, ch, hiChar); for (; ch <= AMin(hiChar, 255); ch++) AAddToSet(set, ch); } else { AAddWideToSet(set, ch); AddToWideSet(info, &wset, ch, ch); } } } while (info->str < info->strEnd && *info->str != ']'); if (info->str == info->strEnd) AGenerateError(info, ErrUnmatchedLbracket); /* Skip ']'. */ info->str++; if (info->flags & A_RE_NOCASE) { for (i = 0; i < 256; i++) if (AIsInSet(set, i)) { AAddToSet(set, ALower(i)); AAddToSet(set, AUpper(i)); } } AddStartSet(info, set, complement); ParseSimpleRepetition(info, A_SET); EmitCharSet(info, set, &wset, complement, flags); FreeWideSet(&wset); break; } case '*': case '+': case '?': case '{': AGenerateError(info, ErrInvalidRepeat); info->str = info->strEnd; break; default: { AReOpcode code; AWideChar ch; ch = ParseChar(info, &code); /* Special character? */ if (code != A_EMPTY) { if (code < CC) { Emit(info, code); if (code == A_BACKREF || code == A_BACKREF_I) { int num = ch & ~48; int oldMaxLen = info->maxLen; if (!(info->parenFlags & (1 << num))) AGenerateError(info, ErrInvalidBackReference); Emit(info, num); info->maxLen *= 2; ParseRepetition(info, info->minLen, oldMaxLen, 2, info->mustStr, info->mustStrBack); } } else { AReOpcode set[A_SET_SIZE]; int flags = 0; memcpy(set, ACharClass[(code - CC) & ~CC_COMP], 32); /* Underline character is part of the \w set. */ if (code == CC_W || code == (CC_W | CC_COMP)) AToggleInSet(set, '_'); AddStartSet(info, set, code & CC_COMP); ParseSimpleRepetition(info, A_SET); if (code == CC_W || code == (CC_W | CC_COMP)) flags = A_WS_WORD_CHAR; EmitCharSet(info, set, NULL, code & CC_COMP, flags); } break; } if (info->flags & A_RE_NOCASE) { code = A_LITERAL_I; ch = ALower(ch); if (info->minLen == 0) { AAddWideToSet(info->startChar, ch); AAddWideToSet(info->startChar, AUpper(ch)); } } else { code = A_LITERAL; if (info->minLen == 0) AAddWideToSet(info->startChar, ch); } if (info->str == info->strEnd || (*info->str != '*' && *info->str != '+' && *info->str != '?' && *info->str != '{')) { info->minLen++; info->maxLen++; litLen++; isPrevLit = TRUE; if (litLen == 2) { /* Convert a single character to literal string. */ AWideChar prevCh; prevCh = info->buf[info->bufInd - 1]; info->buf[info->bufInd - 2] += A_STRING; info->buf[info->bufInd - 1] = 2; Emit(info, prevCh); Emit(info, ch); if (info->mustStr == 0) { info->mustStr = info->bufInd - litLen; info->mustStrBack = info->maxLen - 2; /* FIX? */ } } else if (litLen > 2) { /* Add a character to a literal string. */ Emit(info, ch); if (info->buf[info->mustStr - 1] <= litLen) { info->mustStr = info->bufInd - litLen; info->mustStrBack = info->maxLen - litLen; /* FIX? */ } info->buf[info->bufInd - litLen - 1] = litLen; } else { Emit(info, code); Emit(info, ch); } } else { ParseSimpleRepetition(info, code); Emit(info, ch); } break; } } } }
bool ParseEvent(Event *Ev, Expr *E, Assertion *A, vector<ValueDecl*>& References, ASTContext& Ctx) { E = E->IgnoreImplicit(); if (auto Ref = dyn_cast<DeclRefExpr>(E)) { auto D = Ref->getDecl(); assert(D); // The __tesla_ignore "event" helps TESLA assertions look like ISO C. if (D->getName() == "__tesla_ignore") { Ev->set_type(Event::IGNORE); return true; } // The only other static __tesla_event is the "now" event. if (D->getName() != "__tesla_now") { Report("TESLA static reference must be __tesla_ignore or __tesla_now", E->getLocStart(), Ctx) << E->getSourceRange(); return false; } Ev->set_type(Event::NOW); *Ev->mutable_now()->mutable_location() = A->location(); return true; } else if (auto Bop = dyn_cast<BinaryOperator>(E)) { // This is a call-and-return like "foo(x) == y". Ev->set_type(Event::FUNCTION); return ParseFunctionCall(Ev->mutable_function(), Bop, References, Ctx); } // Otherwise, it's a call to a TESLA "function" like __tesla_predicate(). auto Call = dyn_cast<CallExpr>(E); if (!Call) { Report("Event should look like a function call", E->getLocStart(), Ctx) << E->getSourceRange(); return false; } auto Callee = Call->getDirectCallee(); if (!Callee) { Report("TESLA event referenced indirectly", Call->getLocStart(), Ctx) << Call->getSourceRange(); return false; } if (Callee->getName() == "__tesla_repeat") { Ev->set_type(Event::REPETITION); return ParseRepetition(Ev->mutable_repetition(), Call, A, References, Ctx); } typedef bool (*FnEventParser)(FunctionEvent*, CallExpr*, vector<ValueDecl*>&, ASTContext&); FnEventParser Parser = llvm::StringSwitch<FnEventParser>(Callee->getName()) .Case("__tesla_entered", &ParseFunctionEntry) .Case("__tesla_leaving", &ParseFunctionExit) .Case("__tesla_call", &ParseFunctionCall) .Default(NULL); if (!Parser) { Report("Unknown TESLA event", E->getLocStart(), Ctx) << E->getSourceRange(); return false; } Ev->set_type(Event::FUNCTION); return Parser(Ev->mutable_function(), Call, References, Ctx); }