Exemple #1
0
void regex::read_tokens(FILE *file){

	unsigned i=0,j=0,k=0;
	unsigned num_tokens=0;
	num_short_tokens=0;
	num_long_tokens=0;
	
	char *token = allocate_char_array(1000);
	int r = fscanf(file,"%s\n",token);
	while(r!=EOF && r>0){
		r=fscanf(file,"%s\n",token);
		if (r!=EOF && r>0) num_tokens++;
	}
	free(token);
	
	char **tokens = allocate_string_array(num_tokens);
	rewind(file);
	while (i<num_tokens){
		char *token =(char *)malloc(1000);
		fscanf(file,"%s\n",token);
		tokens[i++]=token;
	}
		
	for (int i=0;i<num_tokens;i++){
		if (strlen(tokens[i])==1 ||( strlen(tokens[i]) <=3 && tokens[i][0]=='\\') ||
			( strlen(tokens[i]) <=4 && tokens[i][0]=='\\' && tokens[i][1]=='x'))
			num_short_tokens++;
	}
	num_long_tokens=num_tokens-num_short_tokens;
	
	short_tokens = allocate_string_array(num_short_tokens);
	long_tokens = allocate_string_array(num_long_tokens);
	for (int i=0;i<num_tokens;i++){
		if (strlen(tokens[i])==1 ||( strlen(tokens[i]) <=3 && tokens[i][0]=='\\') ||
			( strlen(tokens[i]) <=4 && tokens[i][0]=='\\' && tokens[i][1]=='x'))
			short_tokens[j++]=tokens[i];
		else
			long_tokens[k++]=tokens[i];	
	}	
	free(tokens);
}
Exemple #2
0
NFA *regex_parser::parse(FILE *file, int from, int to){
	rewind(file);
	char *re=allocate_char_array(4000);
	char cmd[4000];
	int i=0;
	int j=0;
	unsigned int c=fgetc(file);
	
	// NFA
	NFA *nfa=new NFA(); 
	NFA *non_anchored = nfa->add_epsilon(); // for .* RegEx
	NFA *anchored = nfa->add_epsilon(); // for anchored RegEx (^)
	
	fprintf(stdout,"\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
	//parsing the RegEx and putting them in a NFA
	while(c!=EOF){
		if (c=='\n' || c=='\r'){
			if(i!=0){
				re[i]='\0';
				if (re[0]!='#'){
					j++;
					if (j>=from && (to==-1 || j<=to)){
						fprintf(stdout,"%dth regex: %s\n",j,re);
						if (DEBUG) fprintf(stdout,"\n%d) processing regex:: <%s> ...\n",j,re);
						parse_re(nfa, re);
					}
				} 
				i=0;
				free(re);
				re=allocate_char_array(4000);
			}
		}else{
			re[i++]=c;
		}	
		c=fgetc(file);
	} //end while
	
	if(i!=0){
		re[i]='\0';
		if (re[0]!= '#'){
			j++;
			if (j>=from && (to==-1 || j<=to)){
				fprintf(stdout,"%dth regex: %s\n",j,re);
				if (DEBUG) fprintf(stdout,"\n%d) processing regex:: <%s> ...\n",j,re);
				parse_re(nfa,re);
			}
		}
		free(re);
	}
	if (DEBUG) fprintf(stdout, "\nAll RegEx processed\n");
	
	//if (re!=NULL) free(re);
	
	//handle -m modifier
	if (m_modifier && (!anchored->get_epsilon()->empty() || !anchored->get_transitions()->empty())){
		non_anchored->add_transition('\n',anchored);
		non_anchored->add_transition('\r',anchored);
	}
	
	//delete non_anchored, if necessary
	if(non_anchored->get_epsilon()->empty() && non_anchored->get_transitions()->empty()){
		nfa->get_epsilon()->remove(non_anchored);
		delete non_anchored;
	}else{
		non_anchored->add_any(non_anchored);
	}
	
	return nfa->get_first();
	
}
Exemple #3
0
NFA *regex_parser::group_regex(FILE *file, int group[]){
	rewind(file);
	char *re=allocate_char_array(1000);
	int i=0, j=0, k=0;
	int size=group[0];
	unsigned int c=fgetc(file);
	
	// NFA
	NFA *nfa=new NFA();
	NFA *non_anchored = nfa->add_epsilon(); // for .* RegEx
	NFA *anchored = nfa->add_epsilon(); // for anchored RegEx (^)
	
	// parsing the RegEx and putting them in a NFA
	int min_j = group[1];
	int max_j = group[1];
	for (k=1; k<=size; k++){
		if (group[k] > max_j)
			max_j = group[k];
		if (group[k] < min_j)
			min_j = group[k];
	}
	if (DEBUG) fprintf(stdout, "@\n");
	while (c!=EOF){
		if (c=='\n' || c=='\r'){
			if (i!=0){
				re[i]='\0';
				if (re[0]!='#'){
					j++;
					if (j>max_j)
						break;
					for (k=1; k<=size; k++){
						if (j==group[k]){
							if (DEBUG) fprintf(stdout,"%d) preprocessing regex:: <%s> ...\n",j,re);
							//if (DEBUG) printf("%d  ", j);
							parse_re(nfa, re);
							break;
						}
						if (j<min_j)
							break;
					}
				}
				i=0;
				free(re);
				re=NULL;
				re=allocate_char_array(1000);
			}
		}
		else{
			re[i++]=c;
		}
		c=fgetc(file);
	} //end while
	
	if (i!=0){
		re[i]='\0';
		if (re[0]!= '#'){
			j++;
			if (j<=max_j)
				for (k=1; k<=size; k++){
					if (j==group[k]){
						if (DEBUG) fprintf(stdout,"\n%d) preprocessing regex:: <%s> ...\n",j,re);
						//if (DEBUG) printf("%d  ", j);
						parse_re(nfa, re);
						break;
					}
					if (j<min_j)
						break;
				}
		}
		free(re);
		re=NULL;
	}
	if (DEBUG) fprintf(stdout, "@\n");
	if (DEBUG) fprintf(stdout, "All RegEx processed\n");
	
	if (re!=NULL) free(re);
	
	//handle -m modifier
	if (m_modifier && (!anchored->get_epsilon()->empty() || !anchored->get_transitions()->empty())){
		non_anchored->add_transition('\n',anchored);
		non_anchored->add_transition('\r',anchored);
	}
	
	//delete non_anchored, if necessary
	if (non_anchored->get_epsilon()->empty() && non_anchored->get_transitions()->empty()){
		nfa->get_epsilon()->remove(non_anchored);
		delete non_anchored;
	}
	else{
		non_anchored->add_any(non_anchored);
	}
	
	return nfa->get_first();
}