示例#1
0
Re_node parse_re(char **s, short end)
{
    Stack stk = NULL, temp;
    Tok_node next_token;
    Re_node re = NULL;

    if (s == NULL || *s == NULL) return NULL;
    while (TRUE) {
        next_token = get_token(s);
        if (next_token == NULL) return NULL;
        switch (tok_type(next_token)) {
        case RPAREN:
            retract_token(s);
        case EOS:
            if (end == tok_type(next_token)) return Top(cat2(&stk));
            else return NULL;
        case LPAREN:
            re = parse_re(s, RPAREN);
            if (Push(&stk, re) == NULL) return NULL;
            if (tok_type(get_token(s)) != RPAREN || re == NULL) return NULL;
            if (Size(stk) > 2) {
                temp = stk->next;
                stk->next = cat2(&temp);	/* condense CAT nodes */
                if (stk->next == NULL) return NULL;
                else stk->size = stk->next->size + 1;
            }
            break;
        case OPSTAR:
            if (wrap(&stk, OPSTAR) == NULL) return NULL;
            break;
        case OPOPT:
            if (wrap(&stk, OPOPT) == NULL) return NULL;
            break;
        case OPALT:
            if (cat2(&stk) == NULL) return NULL;
            re = parse_re(s, end);
            if (re == NULL) return NULL;
            if (mk_alt(&stk, re) == NULL) return NULL;
            break;
        case LITERAL:
            if (Push(&stk, tok_val(next_token)) == NULL) return NULL;
            if (Size(stk) > 2) {
                temp = stk->next;
                stk->next = cat2(&temp);    /* condense CAT nodes */
                if (stk->next == NULL) return NULL;
                else stk->size = stk->next->size + 1;
            }
            break;
        default:
            printf("parse_re: unknown token type %d\n", tok_type(next_token));
            break;
        }
    }
}
示例#2
0
Re_node parse(char *s)
{
    Re_node tree, temp;
    Stack stk = NULL;

    tree = parse_re(&s, NUL);
    if (tree == NULL || Push(&stk, tree) == NULL) return NULL;
    temp = mk_leaf(EOS, C_LIT, NUL, NULL);
    if (temp == NULL || Push(&stk, temp) == NULL) return NULL;
    final_pos = --pos_cnt;
    return Top(cat2(&stk));
}
示例#3
0
文件: parser.c 项目: bunnywj/GProject
void *regex_parser::parse_re(NFA* nfa, const char *re){
	int ptr=0;
	bool tilde_re=false;
	NFA *non_anchored = *(nfa->get_epsilon()->begin());
	NFA *anchored = *(++nfa->get_epsilon()->begin());

	//check whether the text must match at the beginning of the regular expression
	if (re[ptr]==TILDE){
		tilde_re=true;
		ptr++;
	}
	NFA *fa=parse_re(re,&ptr,false);	
	fa->get_last()->accept();
	if (!tilde_re){ 
		non_anchored->link(fa->get_first());
	}else{
		anchored->link(fa->get_first());
	}
}
示例#4
0
文件: parser.c 项目: bunnywj/GProject
NFA *regex_parser::parse(FILE *file, int from, int to){
	rewind(file);
	char *re=allocate_char_array(4000);
	char cmd[4000];
	int i=0;
	int j=0;
	unsigned int c=fgetc(file);
	
	// NFA
	NFA *nfa=new NFA(); 
	NFA *non_anchored = nfa->add_epsilon(); // for .* RegEx
	NFA *anchored = nfa->add_epsilon(); // for anchored RegEx (^)
	
	fprintf(stdout,"\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
	//parsing the RegEx and putting them in a NFA
	while(c!=EOF){
		if (c=='\n' || c=='\r'){
			if(i!=0){
				re[i]='\0';
				if (re[0]!='#'){
					j++;
					if (j>=from && (to==-1 || j<=to)){
						fprintf(stdout,"%dth regex: %s\n",j,re);
						if (DEBUG) fprintf(stdout,"\n%d) processing regex:: <%s> ...\n",j,re);
						parse_re(nfa, re);
					}
				} 
				i=0;
				free(re);
				re=allocate_char_array(4000);
			}
		}else{
			re[i++]=c;
		}	
		c=fgetc(file);
	} //end while
	
	if(i!=0){
		re[i]='\0';
		if (re[0]!= '#'){
			j++;
			if (j>=from && (to==-1 || j<=to)){
				fprintf(stdout,"%dth regex: %s\n",j,re);
				if (DEBUG) fprintf(stdout,"\n%d) processing regex:: <%s> ...\n",j,re);
				parse_re(nfa,re);
			}
		}
		free(re);
	}
	if (DEBUG) fprintf(stdout, "\nAll RegEx processed\n");
	
	//if (re!=NULL) free(re);
	
	//handle -m modifier
	if (m_modifier && (!anchored->get_epsilon()->empty() || !anchored->get_transitions()->empty())){
		non_anchored->add_transition('\n',anchored);
		non_anchored->add_transition('\r',anchored);
	}
	
	//delete non_anchored, if necessary
	if(non_anchored->get_epsilon()->empty() && non_anchored->get_transitions()->empty()){
		nfa->get_epsilon()->remove(non_anchored);
		delete non_anchored;
	}else{
		non_anchored->add_any(non_anchored);
	}
	
	return nfa->get_first();
	
}
示例#5
0
文件: parser.c 项目: bunnywj/GProject
NFA *regex_parser::parse_re(const char *re, int *ptr, bool bracket){
	NFA *fa=new NFA();
	NFA *to_link=NULL;
	bool open_b=bracket;
	bool close_b=false;
	while((*ptr)<strlen(re)){
		if(re[(*ptr)]==ESCAPE){			
			int_set *chars=new int_set(CSIZE);
			(*ptr)=process_escape(re, (*ptr)+1,chars);
			if((*ptr)==strlen(re)||!is_repetition(re[(*ptr)])){
				fa=fa->add_transition(chars);
			}else{
				to_link=new NFA();
				to_link=to_link->add_transition(chars);
			}	
			delete chars;
		}else if (!is_special(re[(*ptr)]) && ((*ptr)==(strlen(re)-1)||!is_repetition(re[(*ptr)+1]))){
			fa=fa->add_transition(re[(*ptr)++]);
		}else if(!is_special(re[(*ptr)])){
			to_link=new NFA();
			to_link=to_link->add_transition(re[(*ptr)++]);
		}else if (re[(*ptr)]==ANY && ((*ptr)==(strlen(re)-1)||!is_repetition(re[(*ptr)+1]))){
			fa=fa->add_any();
			(*ptr)++;
		}else if(re[(*ptr)]==ANY){
			to_link=new NFA();
			to_link=to_link->add_any();
			(*ptr)++;
		}else if (re[(*ptr)]==STAR){
			(*ptr)++;
			if (close_b)
				return fa->make_rep(0,_INFINITY);
			else{	
				to_link=to_link->make_rep(0,_INFINITY);
				fa=fa->link(to_link);			
			}
		}else if (re[(*ptr)]==OPT){
			(*ptr)++;
			if (close_b)
				return fa->make_rep(0,1);
			else{	
				to_link=to_link->make_rep(0,1);
				fa=fa->link(to_link);			
			}
		}else if (re[(*ptr)]==PLUS){
			(*ptr)++;
			if (close_b){
				return fa->make_rep(1,_INFINITY);
			}else{
				to_link=to_link->make_rep(1,_INFINITY);
				fa=fa->link(to_link);			
			}
		}else if(re[(*ptr)]==OPEN_QBRACKET){
			if ((*ptr)==(strlen(re)-1))
				fatal("regex_parser:: parse_re: { in last position.");
			else{
				int lb=0; int ub=_INFINITY;	
				(*ptr)=process_quantifier(re,(*ptr)+1,&lb,&ub);
				if (close_b)
					return fa->make_rep(lb,ub);
				else{	
					to_link=to_link->make_rep(lb,ub);
					fa=fa->link(to_link);			
				}	
			}	
		}else if(re[(*ptr)]==OPEN_SBRACKET){
			if ((*ptr)==(strlen(re)-1))
				fatal("regex_parser:: parse_re: [ in last position.");
			else	
				(*ptr)=process_range(&fa,&to_link,re,(*ptr)+1);
		}else if(re[(*ptr)]==OR){
			(*ptr)++;
			fa=fa->make_or(parse_re(re,ptr,false));
		}else if(re[(*ptr)]==OPEN_RBRACKET){
			(*ptr)++;
			fa=fa->get_last()->link(parse_re(re,ptr,true));
		}else if(re[(*ptr)]==CLOSE_RBRACKET){
			if (open_b){
				close_b=true;
				(*ptr)++;
				if ((*ptr)==strlen(re) || !is_repetition(re[(*ptr)]))
					return fa;
			}
			//fatal("parse:: parse_re : close ) without opening it.");
			else{
				return fa;			
			}	
		}
	}
	return fa->get_first();
}
示例#6
0
文件: parser.c 项目: bunnywj/GProject
NFA *regex_parser::group_regex(FILE *file, int group[]){
	rewind(file);
	char *re=allocate_char_array(1000);
	int i=0, j=0, k=0;
	int size=group[0];
	unsigned int c=fgetc(file);
	
	// NFA
	NFA *nfa=new NFA();
	NFA *non_anchored = nfa->add_epsilon(); // for .* RegEx
	NFA *anchored = nfa->add_epsilon(); // for anchored RegEx (^)
	
	// parsing the RegEx and putting them in a NFA
	int min_j = group[1];
	int max_j = group[1];
	for (k=1; k<=size; k++){
		if (group[k] > max_j)
			max_j = group[k];
		if (group[k] < min_j)
			min_j = group[k];
	}
	if (DEBUG) fprintf(stdout, "@\n");
	while (c!=EOF){
		if (c=='\n' || c=='\r'){
			if (i!=0){
				re[i]='\0';
				if (re[0]!='#'){
					j++;
					if (j>max_j)
						break;
					for (k=1; k<=size; k++){
						if (j==group[k]){
							if (DEBUG) fprintf(stdout,"%d) preprocessing regex:: <%s> ...\n",j,re);
							//if (DEBUG) printf("%d  ", j);
							parse_re(nfa, re);
							break;
						}
						if (j<min_j)
							break;
					}
				}
				i=0;
				free(re);
				re=NULL;
				re=allocate_char_array(1000);
			}
		}
		else{
			re[i++]=c;
		}
		c=fgetc(file);
	} //end while
	
	if (i!=0){
		re[i]='\0';
		if (re[0]!= '#'){
			j++;
			if (j<=max_j)
				for (k=1; k<=size; k++){
					if (j==group[k]){
						if (DEBUG) fprintf(stdout,"\n%d) preprocessing regex:: <%s> ...\n",j,re);
						//if (DEBUG) printf("%d  ", j);
						parse_re(nfa, re);
						break;
					}
					if (j<min_j)
						break;
				}
		}
		free(re);
		re=NULL;
	}
	if (DEBUG) fprintf(stdout, "@\n");
	if (DEBUG) fprintf(stdout, "All RegEx processed\n");
	
	if (re!=NULL) free(re);
	
	//handle -m modifier
	if (m_modifier && (!anchored->get_epsilon()->empty() || !anchored->get_transitions()->empty())){
		non_anchored->add_transition('\n',anchored);
		non_anchored->add_transition('\r',anchored);
	}
	
	//delete non_anchored, if necessary
	if (non_anchored->get_epsilon()->empty() && non_anchored->get_transitions()->empty()){
		nfa->get_epsilon()->remove(non_anchored);
		delete non_anchored;
	}
	else{
		non_anchored->add_any(non_anchored);
	}
	
	return nfa->get_first();
}