rule_list() #endif { zzRULE; zzBLOCK(zztasp1); zzMake0; { if ( (setwd1[LA(1)]&0x8) ) { rule(); zzaRet.l=zzaArg(zztasp1,1 ).l; zzaRet.r=zzaArg(zztasp1,1 ).r; { zzBLOCK(zztasp2); zzMake0; { while ( (setwd1[LA(1)]&0x10) ) { rule(); {nfa_node *t1; t1 = new_nfa_node(); (t1)->trans[0]=zzaRet.l; (t1)->trans[1]=zzaArg(zztasp2,1 ).l; /* all accept nodes "dead ends" */ zzaRet.l=t1; zzaRet.r=NULL; } zzLOOP(zztasp2); } zzEXIT(zztasp2); } } } else { if ( (setwd1[LA(1)]&0x20) ) { zzaRet.l = new_nfa_node(); zzaRet.r = NULL; warning("no regular expressions", zzline); } else {zzFAIL(1,zzerr2,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} } zzEXIT(zztasp1); return; fail: zzEXIT(zztasp1); zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); zzresynch(setwd1, 0x40); } }
reg_expr() #endif { zzRULE; zzBLOCK(zztasp1); zzMake0; { and_expr(); zzaRet.l=zzaArg(zztasp1,1 ).l; zzaRet.r=zzaArg(zztasp1,1 ).r; { zzBLOCK(zztasp2); zzMake0; { while ( (LA(1)==OR) ) { zzmatch(OR); zzCONSUME; and_expr(); {nfa_node *t1, *t2; t1 = new_nfa_node(); t2 = new_nfa_node(); (t1)->trans[0]=zzaRet.l; (t1)->trans[1]=zzaArg(zztasp2,2 ).l; /* MR23 */ if (zzaRet.r != NULL) (zzaRet.r)->trans[1]=t2; if (zzaArg(zztasp2,2 ).r) { (zzaArg(zztasp2,2 ).r)->trans[1]=t2; /* MR20 */ } zzaRet.l=t1; zzaRet.r=t2; } zzLOOP(zztasp2); } zzEXIT(zztasp2); } } zzEXIT(zztasp1); return; fail: zzEXIT(zztasp1); zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); zzresynch(setwd2, 0x4); } }
expr() #endif { zzRULE; zzBLOCK(zztasp1); zzMake0; { zzaRet.l = new_nfa_node(); zzaRet.r = new_nfa_node(); if ( (LA(1)==L_BRACK) ) { zzmatch(L_BRACK); zzCONSUME; atom_list(); zzmatch(R_BRACK); (zzaRet.l)->trans[0] = zzaRet.r; (zzaRet.l)->label = set_dup(zzaArg(zztasp1,2 ).label); set_orin(&used_chars,(zzaRet.l)->label); zzCONSUME; } else { if ( (LA(1)==NOT) ) { zzmatch(NOT); zzCONSUME; zzmatch(L_BRACK); zzCONSUME; atom_list(); zzmatch(R_BRACK); (zzaRet.l)->trans[0] = zzaRet.r; (zzaRet.l)->label = set_dif(normal_chars,zzaArg(zztasp1,3 ).label); set_orin(&used_chars,(zzaRet.l)->label); zzCONSUME; } else { if ( (LA(1)==L_PAR) ) { zzmatch(L_PAR); zzCONSUME; reg_expr(); zzmatch(R_PAR); (zzaRet.l)->trans[0] = zzaArg(zztasp1,2 ).l; (zzaArg(zztasp1,2 ).r)->trans[1] = zzaRet.r; zzCONSUME; } else { if ( (LA(1)==L_BRACE) ) { zzmatch(L_BRACE); zzCONSUME; reg_expr(); zzmatch(R_BRACE); (zzaRet.l)->trans[0] = zzaArg(zztasp1,2 ).l; (zzaRet.l)->trans[1] = zzaRet.r; (zzaArg(zztasp1,2 ).r)->trans[1] = zzaRet.r; zzCONSUME; } else { if ( (setwd2[LA(1)]&0x40) ) { atom(); (zzaRet.l)->trans[0] = zzaRet.r; (zzaRet.l)->label = set_dup(zzaArg(zztasp1,1 ).label); set_orin(&used_chars,(zzaRet.l)->label); } else {zzFAIL(1,zzerr5,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} } } } } zzEXIT(zztasp1); return; fail: zzEXIT(zztasp1); zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); zzresynch(setwd2, 0x80); } }
repeat_expr() #endif { zzRULE; zzBLOCK(zztasp1); zzMake0; { if ( (setwd2[LA(1)]&0x10) ) { expr(); zzaRet.l=zzaArg(zztasp1,1 ).l; zzaRet.r=zzaArg(zztasp1,1 ).r; { zzBLOCK(zztasp2); zzMake0; { if ( (LA(1)==ZERO_MORE) ) { zzmatch(ZERO_MORE); { nfa_node *t1,*t2; (zzaRet.r)->trans[0] = zzaRet.l; t1 = new_nfa_node(); t2 = new_nfa_node(); t1->trans[0]=zzaRet.l; t1->trans[1]=t2; (zzaRet.r)->trans[1]=t2; zzaRet.l=t1;zzaRet.r=t2; } zzCONSUME; } else { if ( (LA(1)==ONE_MORE) ) { zzmatch(ONE_MORE); (zzaRet.r)->trans[0] = zzaRet.l; zzCONSUME; } } zzEXIT(zztasp2); } } } else { if ( (LA(1)==ZERO_MORE) ) { zzmatch(ZERO_MORE); error("no expression for *", zzline); zzCONSUME; } else { if ( (LA(1)==ONE_MORE) ) { zzmatch(ONE_MORE); error("no expression for +", zzline); zzCONSUME; } else {zzFAIL(1,zzerr4,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} } } zzEXIT(zztasp1); return; fail: zzEXIT(zztasp1); zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); zzresynch(setwd2, 0x20); } }
struct nfa regexp_to_nfa(struct Expression* expr, int ref, int level) { struct nfa result = { 0 }; if (expr->typecode != LITERAL_STRING_EXPRESSION_TYPE) { result.start_node = new_nfa_node(); REF_NFA(result.start_node, ref); result.accept_node = new_nfa_node(); REF_NFA(result.accept_node, ref); } switch (expr->typecode) { case REPEATED_EXPRESSION_TYPE: { struct RepeatedExpression* repExpr = (struct RepeatedExpression*)expr; struct nfa_node* start_node = result.start_node; struct nfa_node* end_node = NULL; /* First, reduce to one of {0,0}, {0,1}, {0,}, or {1,} */ int lower_bound = repExpr->lower_bound; int upper_bound = repExpr->upper_bound; while (!((lower_bound == 0 && upper_bound == 0) || (lower_bound == 0 && upper_bound == 1) || (lower_bound == 0 && upper_bound == INFINITY) || (lower_bound == 1 && upper_bound == INFINITY))) { struct nfa nfa_arg_clone = regexp_to_nfa(repExpr->expression_repeated, 0, level + 1); end_node = new_nfa_node(); add_epsilon_edge(start_node, nfa_arg_clone.start_node); REF_NFA(nfa_arg_clone.start_node, 1); add_epsilon_edge(nfa_arg_clone.accept_node, end_node); REF_NFA(end_node, 1); if (lower_bound == 0) { if (repExpr->is_greedy) { start_node->epsilon_next2 = end_node; REF_NFA(start_node->epsilon_next2, 1); } else { /* Give 0 times priority */ start_node->epsilon_next2 = start_node->epsilon_next1; start_node->epsilon_next1 = end_node; REF_NFA(start_node->epsilon_next1, 1); REF_NFA(start_node->epsilon_next2, 1); } } else { lower_bound--; } if (upper_bound != INFINITY) { upper_bound--; } start_node = end_node; } /* Now do {0,0}, {0,1}, {0,}, or {1,} */ if (upper_bound > lower_bound || upper_bound == INFINITY) { struct nfa nfa_arg = regexp_to_nfa(repExpr->expression_repeated, 0, level + 1); end_node = new_nfa_node(); start_node->epsilon_next1 = nfa_arg.start_node; REF_NFA(start_node->epsilon_next1, 1); if (lower_bound == 0) { if (repExpr->is_greedy) { start_node->epsilon_next2 = end_node; REF_NFA(start_node->epsilon_next2, 1); } else { /* Give 0 times priority */ REF_NFA(start_node->epsilon_next1, -1); start_node->epsilon_next1 = end_node; REF_NFA(start_node->epsilon_next1, 1); start_node->epsilon_next2 = start_node->epsilon_next1; REF_NFA(start_node->epsilon_next2, 1); } } add_epsilon_edge(nfa_arg.accept_node, end_node); REF_NFA(end_node, 1); if (upper_bound == INFINITY) { if (repExpr->is_greedy) { end_node->epsilon_next1 = nfa_arg.start_node; REF_NFA(nfa_arg.start_node, 1); } else { end_node->epsilon_next2 = nfa_arg.start_node; REF_NFA(nfa_arg.start_node, 1); } } } add_epsilon_edge(end_node, result.accept_node); REF_NFA(result.accept_node, 1); break; } case UNION_EXPRESSION_TYPE: { struct UnionExpression* unionExpr = (struct UnionExpression*)expr; struct nfa nfa_left_arg = regexp_to_nfa(unionExpr->left_expression, 1, level + 1); struct nfa nfa_right_arg = regexp_to_nfa(unionExpr->right_expression, 1, level + 1); result.start_node->epsilon_next1 = nfa_left_arg.start_node; result.start_node->epsilon_next2 = nfa_right_arg.start_node; REF_NFA(result.start_node->epsilon_next1, 1); REF_NFA(result.start_node->epsilon_next2, 1); add_epsilon_edge(nfa_left_arg.accept_node, result.accept_node); REF_NFA(result.accept_node, 1); add_epsilon_edge(nfa_right_arg.accept_node, result.accept_node); REF_NFA(result.accept_node, 1); break; } case CONCATENATE_EXPRESSION_TYPE: { struct ConcatenateExpression* concatExpr = (struct ConcatenateExpression*)expr; struct nfa nfa_left_arg = regexp_to_nfa(concatExpr->left_expression, 0, level + 1); struct nfa nfa_right_arg = regexp_to_nfa(concatExpr->right_expression, 0, level + 1); /* Need to use new start/end node because the concatenation might have a separate group number from the components. */ result.start_node->epsilon_next1 = nfa_left_arg.start_node; REF_NFA(result.start_node->epsilon_next1, 1); add_epsilon_edge(nfa_left_arg.accept_node, nfa_right_arg.start_node); REF_NFA(nfa_right_arg.start_node, 1); add_epsilon_edge(nfa_right_arg.accept_node, result.accept_node); REF_NFA(result.accept_node, 1); break; } case CHARSET_EXPRESSION_TYPE: { struct CharSetExpression* charSetExpr = (struct CharSetExpression*)expr; CopyCharSet(result.start_node->following_set, charSetExpr->set); result.start_node->set_next = result.accept_node; REF_NFA(result.start_node->set_next, 1); SetAllCharSet(result.start_node->preceding_set); result.start_node->eat_char = 1; break; } case ZERO_WIDTH_EXPRESSION_TYPE: { struct ZeroWidthExpression* zeroWidthExpr = (struct ZeroWidthExpression*)expr; CopyCharSet(result.start_node->preceding_set, zeroWidthExpr->preceding_set); CopyCharSet(result.start_node->following_set, zeroWidthExpr->following_set); result.start_node->set_next = result.accept_node; REF_NFA(result.start_node->set_next, 1); result.start_node->eat_char = 0; break; } case LITERAL_STRING_EXPRESSION_TYPE: { struct LiteralStringExpression* litStringExpr = (struct LiteralStringExpression*)expr; RegexpTokenType* str = litStringExpr->literal_string; int i; if (str[0] == 0) { /* Must have separate start and end state */ if (result.start_node) { free_nfa_node (result.start_node); result.start_node = NULL; } result.start_node = new_nfa_node(); REF_NFA(result.start_node, ref); if (result.accept_node) { free_nfa_node (result.accept_node); result.accept_node = NULL; } result.accept_node = new_nfa_node(); REF_NFA(result.accept_node, ref); result.start_node->epsilon_next1 = result.accept_node; REF_NFA(result.start_node->epsilon_next1, 1); } else { struct nfa_node* current_node = new_nfa_node(); if (result.start_node) { free_nfa_node (result.start_node); result.start_node = NULL; }; result.start_node = current_node; REF_NFA(result.start_node, ref); for(i=0; str[i] != 0; i++) { struct nfa_node* next_node = new_nfa_node(); CharSetInsert(current_node->following_set, (char)str[i]); SetAllCharSet(current_node->preceding_set); current_node->eat_char = 1; current_node->set_next = next_node; REF_NFA(current_node->set_next, 1); current_node = next_node; } if (result.accept_node) { free_nfa_node (result.accept_node); result.accept_node = NULL; } result.accept_node = current_node; REF_NFA(result.accept_node, ref); } break; } } result.start_node->group_start_number = expr->group_number; result.accept_node->group_end_number = expr->group_number; return result; }