bool fileReadWrite ( string inFile, string outFile ) { ifstream fpCal; ofstream fpOut; fpCal.open( strdup(inFile.c_str()) ); fpOut.open( strdup(outFile.c_str()) ); if (!fpCal.is_open() || !fpOut.is_open()){ perror("Regular expression or output file doesn't exist or could not be opened\n"); exit (1); } string discardAlphabetString; getline (fpCal,discardAlphabetString ); vector < NFAStateSet > allRegexNFA; vector <string> tokenClassName; string infixExp; while ( getline ( fpCal,infixExp ) ){ stringstream ss(infixExp); vector <string> tokenRegEx; while (ss){ string sub; ss >> sub; tokenRegEx.push_back( sub ); } tokenClassName.push_back( tokenRegEx[0] ); string escapedSeq = convertStringToEscapedString ( tokenRegEx[1] ); allRegexNFA.push_back( generateOpStack ( infixToPostfix( implicitConcat ( escapedSeq ) ), inFile ) ); } /* * Now allRegexNFA contains all NFAs for each expression as a vector * of vectors. Now call function to join these together, single start * state goes to each start state of these NFAs, and then each end * is stored separately with TOKEN_IDs (contained in tokenClassName) */ NFAStateSet resultCombined = combineAllNFA ( allRegexNFA ); int tokenPtr = 0; for ( unsigned int i=0; i < resultCombined.size(); i++ ) if ( resultCombined[i][0] == -1 ) fpOut<<tokenClassName[tokenPtr++]<<" "<<resultCombined[i][2]<<","; fpOut<<"\n"; fpOut<<uniqueStateID<<"\n"; for ( unsigned int i=0; i < resultCombined.size(); i++ ){ bool endReached = false; for ( unsigned int j=0; j<resultCombined[i].size(); j++ ) if ( resultCombined[i][0] != -1 ){ fpOut<<resultCombined[i][j]<<"\t"; endReached = true; } if ( endReached ) fpOut<<"\n"; } fpCal.close(); fpOut.close(); return (0); }
NFAStateSet operationSTAR ( NFAStateSet a ) { /* * CONCATs the states pointed by a and b, and then returns the NFA for * a* */ NFAStateSet output; for ( unsigned int i=0; i<a.size() && a[i][0] != -1; i++ ) output.push_back( a[i] ); int oldStart; int oldFinish; if ( a[a.size()-1][0] == -1 ){ oldStart = a[a.size()-1][1]; oldFinish = a[a.size()-1][2]; } else{ oldStart = a[0][1]; oldFinish = a[0][2]; } int newStart = uniqueStateID; uniqueStateID++; int newFinal = uniqueStateID; uniqueStateID++; vector<int> startEpsilon, startFinishEpsilon; startEpsilon.push_back ( operators[EPSILON] ); startEpsilon.push_back ( newStart ); startEpsilon.push_back ( oldStart ); startFinishEpsilon.push_back ( operators[EPSILON] ); startFinishEpsilon.push_back ( newStart ); startFinishEpsilon.push_back ( newFinal ); vector<int> loopBack, finishEpsilon; loopBack.push_back ( operators[EPSILON] ); loopBack.push_back ( oldFinish ); loopBack.push_back ( oldStart ); finishEpsilon.push_back ( operators[EPSILON] ); finishEpsilon.push_back ( oldFinish ); finishEpsilon.push_back ( newFinal ); vector<int> startEndInfo; startEndInfo.push_back( -1 ); startEndInfo.push_back( newStart ); startEndInfo.push_back( newFinal ); output.push_back ( startEpsilon ); output.push_back ( startFinishEpsilon ); output.push_back ( loopBack ); output.push_back ( finishEpsilon ); output.push_back ( startEndInfo ); return output; }
NFAStateSet operationCONCAT ( NFAStateSet a, NFAStateSet b ) { /* * CONCATs the states pointed by a and b, and then returns the NFA for * a@b */ NFAStateSet output; for ( unsigned int i=0; i<a.size() && a[i][0] != -1; i++ ) output.push_back( a[i] ); for ( unsigned int i=0; i<b.size() && b[i][0] != -1; i++ ) output.push_back( b[i] ); int oldStart1; int oldStart2; int oldFinish1; int oldFinish2; if ( a[a.size()-1][0] == -1 ){ oldStart1 = a[a.size()-1][1]; oldFinish1 = a[a.size()-1][2]; } else{ oldStart1 = a[0][1]; oldFinish1 = a[0][2]; } if ( b[b.size()-1][0] == -1 ){ oldStart2 = b[b.size()-1][1]; oldFinish2 = b[b.size()-1][2]; } else { oldStart2 = b[0][1]; oldFinish2 = b[0][2]; } int newStart = uniqueStateID; vector<int> startEpsilon, middleEpsilon; startEpsilon.push_back ( operators[EPSILON] ); startEpsilon.push_back ( newStart ); startEpsilon.push_back ( oldStart1 ); middleEpsilon.push_back ( operators[EPSILON] ); middleEpsilon.push_back ( oldFinish1 ); middleEpsilon.push_back ( oldStart2 ); uniqueStateID++; int newFinal = uniqueStateID; vector<int> finalEpsilon; finalEpsilon.push_back ( operators[EPSILON] ); finalEpsilon.push_back ( oldFinish2 ); finalEpsilon.push_back ( newFinal ); uniqueStateID++; vector<int> startEndInfo; startEndInfo.push_back( -1 ); startEndInfo.push_back( newStart ); startEndInfo.push_back( newFinal ); output.push_back ( startEpsilon ); output.push_back ( middleEpsilon ); output.push_back ( finalEpsilon ); output.push_back ( startEndInfo ); return output; }
int getFinalState ( NFAStateSet states ) { int finalState; for ( unsigned int i=0; i<states.size(); i++) if ( states[i][0] == -1 ) finalState = states[i][2]; return finalState; }
static void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, const PostDomTree &pdom_tree, const NFAStateSet &init, map<NFAVertex, NFAStateSet> *squash, som_type som, const vector<DepthMinMax> &som_depths, const ue2::unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache) { deque<NFAVertex> remaining; for (const auto &m : *squash) { remaining.push_back(m.first); } while (!remaining.empty()) { NFAVertex v = remaining.back(); remaining.pop_back(); for (auto u : inv_adjacent_vertices_range(v, g)) { if (is_special(u, g)) { continue; } if (g[v].char_reach != g[u].char_reach) { continue; } if (out_degree(u, g) != 1) { continue; } NFAStateSet u_squash(init.size()); u32 u_index = g[u].index; buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex, pdom_tree, som, som_depths, region_map, cache); u_squash.set(u_index); /* never clear ourselves */ if ((~u_squash).any()) { // i.e. some bits unset in mask DEBUG_PRINTF("%u is an upstream squasher of %u\n", u_index, g[v].index); (*squash)[u] = u_squash; remaining.push_back(u); } } } }
NFAStateSet generateOpStack ( string postfix, string fileName ) { stack < NFAStateSet > stateStack; NFAStateSet inputState = allTermStates( fileName ); bool isSingleton = true; for ( unsigned int i=0; i<postfix.length(); i++ ){ if ( isOperator( postfix[i] ) == -1 ){ //use the nfa for this state for ( unsigned int j = 0; j<inputState.size(); j++ ){ if ( inputState[j][0] == (int)postfix[i] ){ NFAStateSet tempToBePushed; /* * Copy this to uidGiven to assign unique IDs */ vector <int> uidGiven = inputState[j]; uidGiven[1]=uniqueStateID; uniqueStateID++; uidGiven[2]=uniqueStateID; uniqueStateID++; tempToBePushed.push_back(uidGiven); stateStack.push ( tempToBePushed ); break; } } } if ( isOperator ( postfix[i] ) == OR ){ //send the two preceding states to the function to OR NFAStateSet output; NFAStateSet operand2 = stateStack.top(); stateStack.pop(); NFAStateSet operand1 = stateStack.top(); stateStack.pop(); output = operationOR ( operand1, operand2 ); stateStack.push(output); isSingleton = false; } if ( isOperator ( postfix[i] ) == CONCAT ){ //send the two preceding states to the function to CONCAT NFAStateSet output; NFAStateSet operand2 = stateStack.top(); stateStack.pop(); NFAStateSet operand1 = stateStack.top(); stateStack.pop(); output = operationCONCAT ( operand1, operand2 ); stateStack.push(output); isSingleton = false; } if ( isOperator ( postfix[i] ) == STAR ){ //send the preceding state to the function to STAR NFAStateSet output; NFAStateSet operand = stateStack.top(); stateStack.pop(); output = operationSTAR ( operand ); stateStack.push(output); isSingleton = false; } } if (!isSingleton){ NFAStateSet finalStateSet = stateStack.top(); if (!stateStack.empty()) stateStack.pop(); if (stateStack.empty()) return finalStateSet; else{ cout<<"ERROR: NFA stack didn't empty itself\n"; exit(1); } } else if ( isSingleton ){ NFAStateSet finalStateSet = stateStack.top(); vector <int> startEndInfo; startEndInfo.push_back(-1); startEndInfo.push_back(finalStateSet[0][1]); startEndInfo.push_back(finalStateSet[0][2]); finalStateSet.push_back( startEndInfo ); if (!stateStack.empty()) stateStack.pop(); if ( stateStack.empty() ) return finalStateSet; else{ cout<<"ERROR: Stack didn't empty itself\n"; exit(1); } } NFAStateSet dummyReturnVal; return (dummyReturnVal); }