Re_node parse_re(char **s, short end) { Stack stk = NULL, temp; Tok_node next_token; Re_node re = NULL; if (s == NULL || *s == NULL) return NULL; while (TRUE) { next_token = get_token(s); if (next_token == NULL) return NULL; switch (tok_type(next_token)) { case RPAREN: retract_token(s); case EOS: if (end == tok_type(next_token)) return Top(cat2(&stk)); else return NULL; case LPAREN: re = parse_re(s, RPAREN); if (Push(&stk, re) == NULL) return NULL; if (tok_type(get_token(s)) != RPAREN || re == NULL) return NULL; if (Size(stk) > 2) { temp = stk->next; stk->next = cat2(&temp); /* condense CAT nodes */ if (stk->next == NULL) return NULL; else stk->size = stk->next->size + 1; } break; case OPSTAR: if (wrap(&stk, OPSTAR) == NULL) return NULL; break; case OPOPT: if (wrap(&stk, OPOPT) == NULL) return NULL; break; case OPALT: if (cat2(&stk) == NULL) return NULL; re = parse_re(s, end); if (re == NULL) return NULL; if (mk_alt(&stk, re) == NULL) return NULL; break; case LITERAL: if (Push(&stk, tok_val(next_token)) == NULL) return NULL; if (Size(stk) > 2) { temp = stk->next; stk->next = cat2(&temp); /* condense CAT nodes */ if (stk->next == NULL) return NULL; else stk->size = stk->next->size + 1; } break; default: printf("parse_re: unknown token type %d\n", tok_type(next_token)); break; } } }
Tok_node get_token(char **s) { Tok_node rn = NULL; if (s == NULL || *s == NULL) return NULL; /* error */ rn = (Tok_node) new_node(rn); if (**s == NUL) tok_type(rn) = EOS; /* end of string */ else { switch (**s) { case '.': /* wildcard */ tok_type(rn) = LITERAL; tok_val(rn) = parse_wildcard(); if (tok_val(rn) == NULL) return NULL; break; case '[': /* character set literal */ (*s)++; tok_type(rn) = LITERAL; tok_val(rn) = parse_cset(s); if (tok_val(rn) == NULL) return NULL; break; case '(': tok_type(rn) = LPAREN; break; case ')' : tok_type(rn) = RPAREN; break; case '*' : tok_type(rn) = OPSTAR; break; case '|' : tok_type(rn) = OPALT; break; case '?' : tok_type(rn) = OPOPT; break; case '\\': /* escaped character */ (*s)++; default : /* must be ordinary character */ tok_type(rn) = LITERAL; tok_val(rn) = parse_chlit(**s); if (tok_val(rn) == NULL) return NULL; break; } /* switch (**s) */ (*s)++; } /* else */ return rn; }
/* like tok_pop() but ignore T_SPACE tokens; if sep, read until chopped */ char *tok_poptext(int sep) { while (tok_type() == T_SPACE) tok_read(); tok_prev[0] = '\0'; do { strcat(tok_prev, tok); tok_read(); } while (tok[0] && !tok_chops(!sep)); return tok_prev[0] ? tok_prev : NULL; }
/* if the next token is s, return zero and skip it */ int tok_jmp(char *s) { tok_blanks(); if (tok_get() && !s[1] && strchr("{}~^\t", s[0]) && !strcmp(s, tok_get())) { tok_pop(); return 0; } if (tok_type() != T_KEYWORD || !tok_get() || strcmp(s, tok_get())) return 1; tok_pop(); return 0; }
/* skip spaces */ static void tok_blanks(void) { while (tok_type() == T_SPACE) tok_pop(); }