void regex::read_tokens(FILE *file){ unsigned i=0,j=0,k=0; unsigned num_tokens=0; num_short_tokens=0; num_long_tokens=0; char *token = allocate_char_array(1000); int r = fscanf(file,"%s\n",token); while(r!=EOF && r>0){ r=fscanf(file,"%s\n",token); if (r!=EOF && r>0) num_tokens++; } free(token); char **tokens = allocate_string_array(num_tokens); rewind(file); while (i<num_tokens){ char *token =(char *)malloc(1000); fscanf(file,"%s\n",token); tokens[i++]=token; } for (int i=0;i<num_tokens;i++){ if (strlen(tokens[i])==1 ||( strlen(tokens[i]) <=3 && tokens[i][0]=='\\') || ( strlen(tokens[i]) <=4 && tokens[i][0]=='\\' && tokens[i][1]=='x')) num_short_tokens++; } num_long_tokens=num_tokens-num_short_tokens; short_tokens = allocate_string_array(num_short_tokens); long_tokens = allocate_string_array(num_long_tokens); for (int i=0;i<num_tokens;i++){ if (strlen(tokens[i])==1 ||( strlen(tokens[i]) <=3 && tokens[i][0]=='\\') || ( strlen(tokens[i]) <=4 && tokens[i][0]=='\\' && tokens[i][1]=='x')) short_tokens[j++]=tokens[i]; else long_tokens[k++]=tokens[i]; } free(tokens); }
NFA *regex_parser::parse(FILE *file, int from, int to){ rewind(file); char *re=allocate_char_array(4000); char cmd[4000]; int i=0; int j=0; unsigned int c=fgetc(file); // NFA NFA *nfa=new NFA(); NFA *non_anchored = nfa->add_epsilon(); // for .* RegEx NFA *anchored = nfa->add_epsilon(); // for anchored RegEx (^) fprintf(stdout,"\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); //parsing the RegEx and putting them in a NFA while(c!=EOF){ if (c=='\n' || c=='\r'){ if(i!=0){ re[i]='\0'; if (re[0]!='#'){ j++; if (j>=from && (to==-1 || j<=to)){ fprintf(stdout,"%dth regex: %s\n",j,re); if (DEBUG) fprintf(stdout,"\n%d) processing regex:: <%s> ...\n",j,re); parse_re(nfa, re); } } i=0; free(re); re=allocate_char_array(4000); } }else{ re[i++]=c; } c=fgetc(file); } //end while if(i!=0){ re[i]='\0'; if (re[0]!= '#'){ j++; if (j>=from && (to==-1 || j<=to)){ fprintf(stdout,"%dth regex: %s\n",j,re); if (DEBUG) fprintf(stdout,"\n%d) processing regex:: <%s> ...\n",j,re); parse_re(nfa,re); } } free(re); } if (DEBUG) fprintf(stdout, "\nAll RegEx processed\n"); //if (re!=NULL) free(re); //handle -m modifier if (m_modifier && (!anchored->get_epsilon()->empty() || !anchored->get_transitions()->empty())){ non_anchored->add_transition('\n',anchored); non_anchored->add_transition('\r',anchored); } //delete non_anchored, if necessary if(non_anchored->get_epsilon()->empty() && non_anchored->get_transitions()->empty()){ nfa->get_epsilon()->remove(non_anchored); delete non_anchored; }else{ non_anchored->add_any(non_anchored); } return nfa->get_first(); }
NFA *regex_parser::group_regex(FILE *file, int group[]){ rewind(file); char *re=allocate_char_array(1000); int i=0, j=0, k=0; int size=group[0]; unsigned int c=fgetc(file); // NFA NFA *nfa=new NFA(); NFA *non_anchored = nfa->add_epsilon(); // for .* RegEx NFA *anchored = nfa->add_epsilon(); // for anchored RegEx (^) // parsing the RegEx and putting them in a NFA int min_j = group[1]; int max_j = group[1]; for (k=1; k<=size; k++){ if (group[k] > max_j) max_j = group[k]; if (group[k] < min_j) min_j = group[k]; } if (DEBUG) fprintf(stdout, "@\n"); while (c!=EOF){ if (c=='\n' || c=='\r'){ if (i!=0){ re[i]='\0'; if (re[0]!='#'){ j++; if (j>max_j) break; for (k=1; k<=size; k++){ if (j==group[k]){ if (DEBUG) fprintf(stdout,"%d) preprocessing regex:: <%s> ...\n",j,re); //if (DEBUG) printf("%d ", j); parse_re(nfa, re); break; } if (j<min_j) break; } } i=0; free(re); re=NULL; re=allocate_char_array(1000); } } else{ re[i++]=c; } c=fgetc(file); } //end while if (i!=0){ re[i]='\0'; if (re[0]!= '#'){ j++; if (j<=max_j) for (k=1; k<=size; k++){ if (j==group[k]){ if (DEBUG) fprintf(stdout,"\n%d) preprocessing regex:: <%s> ...\n",j,re); //if (DEBUG) printf("%d ", j); parse_re(nfa, re); break; } if (j<min_j) break; } } free(re); re=NULL; } if (DEBUG) fprintf(stdout, "@\n"); if (DEBUG) fprintf(stdout, "All RegEx processed\n"); if (re!=NULL) free(re); //handle -m modifier if (m_modifier && (!anchored->get_epsilon()->empty() || !anchored->get_transitions()->empty())){ non_anchored->add_transition('\n',anchored); non_anchored->add_transition('\r',anchored); } //delete non_anchored, if necessary if (non_anchored->get_epsilon()->empty() && non_anchored->get_transitions()->empty()){ nfa->get_epsilon()->remove(non_anchored); delete non_anchored; } else{ non_anchored->add_any(non_anchored); } return nfa->get_first(); }