//Returns the final state in with maximum prefix match //i.e which has maximum forward pointer. STATE_AND_FORWARD simulateNFA(char input[], STATE STARTSTATE, int forward) { char currentChar = input[forward]; if(isFinalState(STARTSTATE))//Final state has no output transition { STATE_AND_FORWARD t = {STARTSTATE, forward-1}; return t; } /*flag checks if there is any transition available for this state, otherwise returns -1*/ int i, flag = 0; STATE_AND_FORWARD max = {-1, -1}, temp = {-1, -1}; for(i=1;i<nextAvailableState;++i) { if(STT[STARTSTATE][i][currentChar])//Move forward ahead { flag = 1; temp = simulateNFA(input, i, forward+1); } else if(STT[STARTSTATE][i]['$'])//EPSILON transition.Dont move forward ahead { flag = 1; temp = simulateNFA(input, i, forward); } if(temp.forward>max.forward) max = temp; } if(flag == 0)//No transition found { STATE_AND_FORWARD t = {-1, -1}; return t; } return max; }
int main() { int i, LB; FILE * fp1,* fp2; char PATTERN[100]; char REGEX[100]; char * POSTFIX; fp1 = fopen("regexp.in", "r"); if(fp1==NULL) { printf("Could not open regexp.in!!"); exit(0); } while(!feof(fp1)) { fscanf(fp1, "%s %s",PATTERN, REGEX); if(strlen(REGEX)==1 || (strlen(REGEX)==2 && REGEX[1]=='*')) { buildIndividualNFA(PATTERN, REGEX); continue; } else if(strlen(REGEX)==2 && REGEX[1]!='*') { REGEX[2]='&'; REGEX[3]='\0'; buildIndividualNFA(PATTERN, REGEX); continue; } POSTFIX = conversion(REGEX); buildIndividualNFA(PATTERN, POSTFIX); } fclose(fp1); buildCombinedNFA(); char input[1000]; char tokenread[100]; initialize_Symbol_Table(); remove("a4_2.out"); while(gets(input)) { int lexemeBegin = 0; while(input[lexemeBegin]!='\0'&&input[lexemeBegin]!='\n' &&input[lexemeBegin]!='\r') //while end of line is not found { while(input[lexemeBegin]==' ' || input[lexemeBegin]=='\t') lexemeBegin++; lexemeBegin = comment(input, lexemeBegin); if(input[lexemeBegin] == '\0' || input[lexemeBegin]=='\n'||input[lexemeBegin]=='\r') break; STATE_AND_FORWARD final = simulateNFA(input, HEAD, lexemeBegin); i = final.forward - lexemeBegin+1; memcpy(tokenread, input+lexemeBegin, i); tokenread[i] = '\0'; token t;//Token attrType a;//attribute field a.string = (char *)malloc((i+1)*sizeof(char)); int tokenID = getTokenID(final.state); if(tokenID == IDNTIFIER) { strcpy(a.string, tokenread); int flag = 0; int j; for(j=0;j<24;j++) { if(strcmp(a.string,keywordArr[j].keyword)==0) //checking if string detected is a keyword { t.tokenID = keywordArr[j].tokenID; t.attribute.string = '\0'; flag = 1; break; } } if(flag==0) //not a keyword; is an identifier { t.tokenID = IDNTIFIER; fp1 = fopen("symbol_table_2.out","r"); int nflag = 0; char identifier[100]; int marker; while(!feof(fp1)) { fscanf(fp1,"%s %d",identifier,&marker); if(strcmp(identifier,a.string)==0) //if identifer is already present in symbol table { nflag = 1; break; } } fclose(fp1); if(nflag==0) //identifier not present in symbol table { fp1 = fopen("symbol_table_2.out","a"); fprintf(fp1,"%s 1\n",a.string); fclose(fp1); } t.attribute = a; } } else if(tokenID == INT_CONST) { a.integer = atoi(tokenread); t.tokenID = INT_CONST; t.attribute = a; } else if(tokenID == FLO_CONST) { a.integer = atof(tokenread); t.tokenID = FLO_CONST; t.attribute = a; } else if(tokenID!=NOTOK) { t.tokenID = tokenID; t.attribute.string = '\0'; } if(tokenID != NOTOK) write_token(t); else { lexemeBegin++; continue; } lexemeBegin = final.forward+1; } } }
int NFA::FindFirstMatchedString(string strToExtract) { return simulateNFA(strToExtract); }