/// \brief Adds a transition for a range of surrogate symbols for the specified state static void add_surrogate_transition(const range<int>& surrogateRange, int currentState, int targetState, ndfa* nfa) { // If the range is out of the range of valid surrogate characters then clip it if (surrogateRange.lower() >= 0x110000) return; if (surrogateRange.upper() > 0x110000) { add_surrogate_transition(range<int>(surrogateRange.lower(), 0x110000), currentState, targetState, nfa); return; } // Work out the range as surrogate pairs pair<int, int> surrogateLower = surrogate_pair(surrogateRange.lower()); pair<int, int> surrogateHigher = surrogate_pair(surrogateRange.upper()-1); // Action depends on whether or not there are 1, 2 or more 'upper' characters if (surrogateLower.first == surrogateHigher.first) { // Transit to a state if we match the 'upper' code point int tmpState = nfa->add_state(); nfa->add_transition(currentState, range<int>(surrogateLower.first, surrogateLower.first+1), tmpState); // Transit to the final state if we match any of the lower symbols nfa->add_transition(tmpState, range<int>(surrogateLower.second, surrogateHigher.second+1), targetState); } else { // Transit to a new state for the lower set of symbols int tmpState1 = nfa->add_state(); nfa->add_transition(currentState, range<int>(surrogateLower.first, surrogateLower.first+1), tmpState1); // Transit to the final state for all the 'lower' symbols nfa->add_transition(tmpState1, range<int>(surrogateLower.second, 0xdc00), targetState); // ... do the same for the 'upper' set of symbols int tmpState2 = nfa->add_state(); nfa->add_transition(currentState, range<int>(surrogateHigher.first, surrogateHigher.first+1), tmpState2); // Transit to the final state for all the 'lower' symbols nfa->add_transition(tmpState2, range<int>(0xd800, surrogateHigher.second+1), targetState); // If there's a middle range, then add transitions for that as well if (surrogateHigher.first-1 > surrogateLower.first) { // Transit for all of the remaining 'higher' symbols int tmpState3 = nfa->add_state(); nfa->add_transition(currentState, range<int>(surrogateLower.first+1, surrogateHigher.first), tmpState3); // Accept for any 'lower' symbol nfa->add_transition(tmpState3, range<int>(0xdc00, 0xe000), targetState); } } }