std::string PhasedExecution::execute(int argc, char* argv[]) { if (_contextManager != nullptr) { DeLOG("Destroying Old Context For Another Run."); delete _contextManager; } if (argc < 2) { perror("Not enough arguments, correct usage: ./repss input_file"); return ""; } _contextManager = new ContextManager(); auto lexerContext = _contextManager->getContext<ContextType::AllowedTypes, ContextType::Lexer>(); std::string inputFileName{argv[1]}; runLexer(&lexerContext, inputFileName); auto grammarContext = _contextManager->getContext<ContextType::AllowedTypes, ContextType::Grammar>(); runGrammarAggregation(&grammarContext); delete _contextManager; _contextManager = nullptr; DeLOG("Finished PhasedExecution\n"); std::string generatedOutput; return generatedOutput; }
void GrammarBlockAggregator::processAnnotatedData(IGrammarContext* const context) { DeLOG("Beggining processing of annotated data from Lexer\n"); DeLOG("-------------------------------------------------\n"); auto annotatedData = context->getAnnotatedData(); size_t sizeOfAnnotatedData = annotatedData.size(); for (size_t index = 0; index < sizeOfAnnotatedData; index++) { DeLOG(std::string{"Processing annotatedData #"}.append(std::to_string(index)).append("\n").c_str()); auto dataString = annotatedData.getAt(index); //parse string for type // - it can be either a keyword or data // data can be recognized as just data // anything else is considered a keyword. // -- currently returns "invalid" for first in std::pair if malformed auto keywordNameAndContent = getKeywordNameAndContentFromAnnotatedData(dataString); DeLOG(std::string{keywordNameAndContent.first}.append(", ").append(keywordNameAndContent.second).append("\n").c_str()); //if data push it to the current Block. //if keyword, analyze it. // 'analysis' consists of, is this expected? if not ignore it. // Also if it's a Variable we add a new Block as the newest child // to this current line of block. // If it's a variable then we have to be on the lookout for a // corresponding 'closure' to end the current Block. // So we must make note of this somehow. // Everything else (terminals) are added to the current block // or line(if not within a block). The system as a whole can be // viewed as a block I guess. if (keywordNameAndContent.first.compare("data_line") != 0) { } else { //theres not much here to do, just add it to whatever the current active grammar block is } //put gramarDataBlock into lexerGrammarProxy } auto grammarDataProxy = context->getGrammarDataProxy(); context->setGrammarBlockAggregate(static_cast<GrammarBlockAggregate&&>(_aggregatedBlocks)); }
void init(const lexer_configuration& config) { if (_lexerDataProxy != nullptr) { perror("LexerManager/LexerDataProxy have already been initialized. Doing nothing..."); return; } auto scanWordTransitionMap = config.getScanWordTransitionMap(); auto scanWords = config.getScanWords(); auto dfaManager = config.getDfaManager(); _lexerDataProxy = new LexerDataProxy(dfaManager, scanWordTransitionMap, scanWords); _context->initLexerDataProxy((const ILexerDataProxy*)_lexerDataProxy); DeLOG("Initialized Lexer Data Proxy Successfully.\nSuccessfully Intialized LexerManager\n"); }
//Outside this class (lexer_word_constructor right now), we maintain a list of implicitly/explicitly created // ScanWordNodes, this ensures we know not to create a ScanWordNode with an existing id explicitly // if we children are needed for an object we only create it if there doesn't already exist a reference for it - // this node created here or elsewhere need not have been init'd yet. // tothinkabout:will 'maybe' we can actually move all this init stuff into constructor make ScanWordNode const?? void ScanWordNode::init(ScanWordTransitionMap* const transitionMap, const std::unordered_set<ScanWordNode*>& existingScanWordNodes, std::vector<ScanWordNode*>& wordsToBeInitd) { if (_lexerDfa == nullptr) { DeLOG("Error - API Misuse: Either init has already been called, or nullptr was given to ScanWordNode constructor. Both bad.\n"); exit(1); } //To handle case where 2 or more transitions point to same lexer dfa we have locallCreatedScanWordNodes. // eg: 'partially ranged' input (a|b|c)->some_lexer_dfa [1] std::vector<ScanWordNode*> locallyCreatedScanWordNodes; _id = _lexerDfa->getId(); auto transitions = _lexerDfa->getTransitions(); _lexerDfa->_printTransitions(); for (auto aTransition : transitions) { const auto stateAndInput = aTransition.getStateAndInput(); const auto input = stateAndInput.getInput(); //1) check if the transition points to lexer_dfa for which a ScanWordNode already exists (has same id) const auto nextDfa = aTransition.getDfaNode(); const auto nextDfaId = nextDfa->getId(); ScanWordNode* nextScanWordNode = nullptr; //check locally created scan words first for (auto existingScanWordNode : locallyCreatedScanWordNodes) { if (existingScanWordNode->getId() == nextDfaId) { nextScanWordNode = existingScanWordNode; break; } } //if scan word node was already created locally, we can skip ahead if (nextScanWordNode == nullptr) { //if transition already exists in unordered_set param, use it for (auto existingScanWordNode : existingScanWordNodes) { if (existingScanWordNode->getId() == nextDfaId) { nextScanWordNode = existingScanWordNode; break; } } //if it exists it neither unordered_set param and not in locallyCreatedScanWordNodes container, we make it if (nextScanWordNode == nullptr) {//if no corresponding ScanWordNode already exists in vector param, create it, place in toBeInitd nextScanWordNode = new ScanWordNode(nextDfa); wordsToBeInitd.push_back(nextScanWordNode); //the calling function should take care of transferring elements in wordsToBeInitd to existingScanWordNodes locallyCreatedScanWordNodes.push_back(nextScanWordNode); //to handle case where two transitions point to same lexer dfa (say with different inputs) } } //2) at this point nextScanWordNode is guaranteed to be set to something (not nullptr) const auto isRangedTransition = aTransition.getIsRanged(); const auto isAnythingBut = aTransition.getIsAnythingBut(); if (isRangedTransition) { DeLOG("\tisRangedTransition=true\n"); char rangedPossibilities[] = {SI_CHARS_LOWER, SI_CHARS_UPPER, SI_CHARS_ANY, SI_NUMBERS_0, SI_NUMBERS_1to9, SI_NUMBERS_0to9, SI_EMPTY}; auto rangedInputCategory = stateAndInput.getInput(); for (auto possibility : rangedPossibilities) { bool shouldAddToTransitionMap; switch (possibility) { case SI_EMPTY: shouldAddToTransitionMap = false; //revisit this. keep false. maybe treat in same spirit as _anythingBut break; case SI_CHARS_LOWER: shouldAddToTransitionMap = islower(rangedInputCategory); break; case SI_CHARS_UPPER: shouldAddToTransitionMap = isupper(rangedInputCategory); break; case SI_CHARS_ANY: shouldAddToTransitionMap = isalpha(rangedInputCategory); break; case SI_NUMBERS_0: shouldAddToTransitionMap = rangedInputCategory == '0'; break; case SI_NUMBERS_1to9: shouldAddToTransitionMap = (rangedInputCategory >= '1' && rangedInputCategory <= '9'); break; case SI_NUMBERS_0to9: shouldAddToTransitionMap = (rangedInputCategory >= '1' && rangedInputCategory <= '9'); break; default: shouldAddToTransitionMap = false; break; }; if (shouldAddToTransitionMap) { if (!isAnythingBut) { TransitionInputKey transitionMapKeyRanged(getId(), possibility, true, false, true); std::pair<TransitionInputKey, ScanWordNode*> transitionMapKeyAndValue{ transitionMapKeyRanged, nextScanWordNode }; transitionMap->emplace(transitionMapKeyAndValue); addProperty(_properties, ScanWordProperties_t::SCAN_WORD_PROPERTY_HAS_RANGED_TRANSITION); } else { if (_anythingButTransition != nullptr) { DeLOG("Ooops, somehow we managed to define two 'anythingBut' transitions, this almost certainly leads to undefined behaviour. Quitting.\n"); exit(1); } TransitionInputKey transitionKeyAnythingButRange(getId(), possibility, true, true, false); std::pair<TransitionInputKey, ScanWordNode*>* transitionKeyAndValue = new std::pair<TransitionInputKey, ScanWordNode*>(transitionKeyAnythingButRange, nextScanWordNode); _anythingButTransition = transitionKeyAndValue; addProperty(_properties, ScanWordProperties_t::SCAN_WORD_PROPERTY_HAS_ANYTHING_BUT_TRANSITION); } } } DeLOG("\tSuccessfully set value for index in _RangedTransitionsByCategory\n"); } else if (isAnythingBut) { if (_anythingButTransition != nullptr) { DeLOG("Ooops, somehow we managed to define two 'anythingBut' transitions, this almost certainly leads to undefined behaviour. Quitting.\n"); exit(1); } TransitionInputKey transitionKeyAnythingButUnranged(getId(), input, false, true, false); std::pair<TransitionInputKey, ScanWordNode*>* transitionKeyAndValue = new std::pair<TransitionInputKey, ScanWordNode*>(transitionKeyAnythingButUnranged, nextScanWordNode); _anythingButTransition = transitionKeyAndValue; addProperty(_properties, ScanWordProperties_t::SCAN_WORD_PROPERTY_HAS_ANYTHING_BUT_TRANSITION); } else { std::cout << "\tisRangedTransition=false" << std::endl; TransitionInputKey transitionMapKey(getId(), input, false, false, true); std::pair<TransitionInputKey, ScanWordNode*> transitionMapKeyAndValue{ transitionMapKey, nextScanWordNode }; transitionMap->emplace(transitionMapKeyAndValue); } } _lexerDfa = nullptr; //we don't need lexerDfa anymore. todo: after scanwords are made lexerDfas not needed at all }
~lexer_configuration() { delete _wordConstructor; DeLOG("Successfully Deleted Lexer Configuration\n"); }
std::pair<std::string, std::string> getKeywordNameAndContentFromAnnotatedData(const std::string annotatedData) { char buffer[annotatedData.size()]; strcpy(buffer, annotatedData.c_str()); char *token = NULL; //begin tokenizing token = strtok(buffer, ":"); if (token == NULL) { DeLOG("Error: Ooops. It looks like the grammar module is expecting a syntax different from the annotated output of lexer\n"); return std::make_pair("invalid", ""); } std::string content; std::string endOfContentDelimeter; if (strlen(token) == 1) { DeLOG("We shouldn't have empty string keyword names.\n"); return std::make_pair("invalid", ""); } std::string keywordName(token+1); if (keywordName.compare("data_line") == 0) { DeLOG("\tIdentified data_line\n"); endOfContentDelimeter = "}"; } else if (keywordName.compare("key_word") == 0) { DeLOG("\tIdentified keyword\n"); token = strtok(NULL, "="); if (token == NULL) { DeLOG("Error: Ooops. It looks like the grammar module is expecting a syntax different from the annotated output of lexer\n"); return std::make_pair("invalid", ""); } endOfContentDelimeter = ">"; } std::string lastToken; std::string secondLastToken; token = strtok(NULL, endOfContentDelimeter.c_str()); //check to see if there are anymore '}'s. Because we allow '{' //within the data part, this needs to be covered if (token == NULL) { DeLOG("Error: Ooops. It looks like the grammar module is expecting a syntax different from the annotated output of lexer"); return std::make_pair("invalid", ""); } if (keywordName.compare("key_word") == 0 && strlen(token) > 1) { //This takes care of leading '<' imediately after the = sign in annotated data. token = (token+1); } size_t count = 0; do { content.append(secondLastToken); if (keywordName.compare("key_word") == 0 && count > 1 && strcmp("}",token) != 0) { content.append(endOfContentDelimeter); } secondLastToken = lastToken; lastToken = std::string(token); token = strtok(NULL, endOfContentDelimeter.c_str()); count++; } while(token != NULL); if (lastToken.compare("}") == 0) { if (keywordName.compare("key_word") == 0 && count > 1) { content.append(endOfContentDelimeter); } } content.append(secondLastToken); //the last Token should be the '}' if (secondLastToken.empty()) { content.append(lastToken); } else if (lastToken.compare("}") != 0) { DeLOG("Warning: The last character in annotated data is expected to be '}'. It appears not to be (grammar module), you should deal wth this.\n"); } return std::make_pair(keywordName, content); }
PhasedExecution::PhasedExecution() : _contextManager(nullptr) { DeLOG("PhasedExecution::PhasedExecution()\n"); }
//merges dfas to one dfa for traversal lexer_word_repr* lexer_dfa_builder::mergeDfas(const std::vector<lexer_word_repr*>* const words, DfaManager& dfaManager) const { lexer_word_repr* start = dfaManager.createLexerWordRepr(); //So each JOB consists of: // 1) a ptr to dfa node where we left off in mergeToWord // 2) a transition that we suspect is placeable in mergetToWord // 3) a vector of dfa nodes already visited in mergeTo, this prevents folding the fromDfa back "itself" // but in reality there should be at path dinstinguishing mergeFrom from mergeTo that doesn't visit // a node in MergeTo twice. This only makes sense, because the "going back itself" thing is strictly for // kleen closure like behaviour on runtime. The minimal "description" of automata is actually constant // and doesn't need to have two nodes twice. // repeated. auto jobQueue = new std::vector<std::vector<std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>>>*>(); for (int i = 0; i < words->size(); i++) { auto jobVector = new std::vector<std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>>>(); jobQueue->push_back(jobVector); } std::cout << std::endl << "Merging dfas to one, words size: " << words->size() << std::endl; //iterate through words int jobLineIndex = 0; for (int i =0; i < words->size(); i++) { lexer_dfa* word = words->at(i); std::cout << word << std::endl; std::cout << word->getId() << std::endl; lexer_dfa* mergeToDfaPtr = start; lexer_dfa* mergeFromDfaPtr = word; std::vector<LexerTransition> nextTransitions = mergeFromDfaPtr->getTransitions(); DeLOG("Getting transitions for word\n") for (auto transition : nextTransitions) { DeLOG("Adding job to job Queue\n"); std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(mergeToDfaPtr, transition, std::vector<lexer_dfa*>{}); (jobQueue->at(jobLineIndex))->push_back(job); } jobLineIndex++; } DeLOG(std::string("\nMerge Process: ").append(std::to_string(jobQueue->size())).append(" job lines(queues) total\n").c_str()); for (jobLineIndex = 0; jobLineIndex < jobQueue->size(); jobLineIndex++) { DeLOG(std::string("Processing Job line #").append(std::to_string(jobLineIndex+1)).append("\n").c_str()); auto jobVector = jobQueue->at(jobLineIndex); while (jobVector->size() != 0) { DeLOG(std::string("\nThere are ").append(std::to_string(jobVector->size())).append(" in job vector. Processing Job #").append(std::to_string(jobLineIndex + 1)).append("\n").c_str()); auto currJobTuple = jobVector->back(); jobVector->pop_back(); auto currMergeToDfaPtr = std::get<0>(currJobTuple); //here we'll check the currMergeToDfaPtr against the previously visited ptrs in MergeTo //if we've already visited it, we know to ignore it this transition (and NOT to put job back in queue) auto ptrsInMergeToAlreadyVisited = std::get<2>(currJobTuple); auto skipAndContinue = false; for (auto mergeToDfaPtrVisited : ptrsInMergeToAlreadyVisited) { if (currMergeToDfaPtr == mergeToDfaPtrVisited) { skipAndContinue = true; break; } } if (skipAndContinue) { continue; } ptrsInMergeToAlreadyVisited.push_back(currMergeToDfaPtr); auto transitionFromCurrMergeFromDfaPtr = std::get<1>(currJobTuple); auto nextMergeFromDfaPtr = transitionFromCurrMergeFromDfaPtr.getDfaNode(); const auto si = transitionFromCurrMergeFromDfaPtr.getStateAndInput(); DeLOG(std::string{"si = {"}.append(std::to_string(si.getState())).append(1, si.getInput()).append("}\n").c_str()); auto nextMergeToDfaPtrCandidateInfo1 = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), false); auto nextMergeToDfaPtrCandidateProperties1 = nextMergeToDfaPtrCandidateInfo1.second; auto nextMergeToDfaPtrCandidateInfo2 = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), true); auto nextMergeToDfaPtrCandidateProperties2 = nextMergeToDfaPtrCandidateInfo2.second; //aka mergeFromDfaProperties const auto currentMergeFromTransitionProperties = transitionFromCurrMergeFromDfaPtr.getProperties(); if (checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_CONTINUANCE) || checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_EJECT)) { //If the transition ~to~ current ~mergeFrom~ dfa has the // properties: PUSH_DOWN_EJECT or PUSH_DOWN_CONTINUANCE, // then we assume we are in a state of recursion. If the // recursive pathway in ~mergeTo~ dfa is not taken, // nextMergeaToDfaPtrCandidatePropertes is 0x0, then we // add the current ~mergeFrom~ dfa (noting that it is for a // recursion (stack-count > 0) to the currMergeToDfa. // If the position is filled, properties != 0x0 in dfaInfo // when we query with param indicating stackCount > 0, // then we push the ~nextMergeTo~ dfa into thejobQueue // (along with the properties of this is not already being // done (like w\ the entire transition)) lexer_dfa* nextMergeToDfaPtr = nullptr; if (nextMergeToDfaPtrCandidateProperties2 != 0x0) { nextMergeToDfaPtr = nextMergeToDfaPtrCandidateInfo2.first; } currMergeToDfaPtr->_printTransitions(); std::cout << "is there nextDfaPtr? " << (nextMergeToDfaPtr != nullptr ? "yes" : "no") << nextMergeToDfaPtr << std::endl; if (nextMergeToDfaPtr == nullptr) { LexerStateAndInput aLexerStateAndInput = transitionFromCurrMergeFromDfaPtr.getStateAndInput(); DeLOG(std::string{"::adding transition(["}.append(std::to_string(aLexerStateAndInput.getState())).append(", '").append(1, aLexerStateAndInput.getInput()).append("']->").append(1, nextMergeFromDfaPtr->getId()).append(") to dfa(").append(1, currMergeToDfaPtr->getId()).append(")\n").c_str()); StateAndInput<int,char> aStateAndInput(aLexerStateAndInput.getState(), aLexerStateAndInput.getInput(), transitionFromCurrMergeFromDfaPtr.getIsRanged()); currMergeToDfaPtr->add_next_dfa(aStateAndInput, nextMergeFromDfaPtr, currentMergeFromTransitionProperties); //Sanity check auto nextMergeToDfaPtrCandidateInfoSanity = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), true); auto nextMergeToDfaPtrCandidatePropertiesSanity = nextMergeToDfaPtrCandidateInfoSanity.second; if (nextMergeToDfaPtrCandidateInfoSanity.first == nullptr) { perror("\nCould not find Node. Exiting.\n"); exit(EXIT_FAILURE); } } else { std::vector<LexerTransition> nextTransitions = nextMergeFromDfaPtr->getTransitions(); for (auto transitionFromNextMergeFromDfa : nextTransitions) { const auto si = transitionFromNextMergeFromDfa.getStateAndInput(); DeLOG(std::string{"Couldn't find an opening, pushing back job { to add ("}.append(std::to_string(si.getState())).append(",").append(1, si.getInput()).append(") from dfa-id(").append(std::to_string(nextMergeToDfaPtr->getId())).append(")\n").c_str()); std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(const_cast<lexer_dfa*>(nextMergeToDfaPtr), transitionFromNextMergeFromDfa, ptrsInMergeToAlreadyVisited); jobVector->push_back(job); } } } else if (checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_NORMAL) || checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_ACTIVATOR)) { //Likewise if the transition ~to~ current ~mergeFrom~ dfa // has the properties: NORMAL or PUSH_DOWN_ACTIVATOR, // then we assume then we assume a non-recursive (even if // by the top top level lexer perspective this has yet to // be determined. If the non-recursive pathway in ~mergeTo~ // dfa is not taken, then we add the current ~mergeTo~ dfa // (noting that it is for a recursion (stack-count > 0). // If the position is filled (non-nullptr) dfa result for // query with stackCount > 0 param, then we push the // ~nextMergeTo~ into thejobQueue. lexer_dfa* nextMergeToDfaPtr = nullptr; if (nextMergeToDfaPtrCandidateProperties1 != 0x0) { nextMergeToDfaPtr = nextMergeToDfaPtrCandidateInfo1.first; } currMergeToDfaPtr->_printTransitions(); std::cout << "is there nextDfaPtr? " << (nextMergeToDfaPtr != nullptr ? "yes" : "no") << nextMergeToDfaPtr << std::endl; if (nextMergeToDfaPtr == nullptr) { LexerStateAndInput aLexerStateAndInput = transitionFromCurrMergeFromDfaPtr.getStateAndInput(); DeLOG(std::string{"::adding transition(["}.append(std::to_string(aLexerStateAndInput.getState())).append(", '").append(1, aLexerStateAndInput.getInput()).append("']->").append(1, nextMergeFromDfaPtr->getId()).append(") to dfa(").append(1, currMergeToDfaPtr->getId()).append(")\n").c_str()); StateAndInput<int,char> aStateAndInput(aLexerStateAndInput.getState(), aLexerStateAndInput.getInput(), transitionFromCurrMergeFromDfaPtr.getIsRanged()); currMergeToDfaPtr->add_next_dfa(aStateAndInput, nextMergeFromDfaPtr, currentMergeFromTransitionProperties); //Sanity check auto nextMergeToDfaPtrCandidateInfoSanity = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), false); auto nextMergeToDfaPtrCandidatePropertiesSanity = nextMergeToDfaPtrCandidateInfoSanity; if (nextMergeToDfaPtrCandidateInfoSanity.first == nullptr) { perror("\nYeah, this is bad. After we just added our new transition to merged rep, we can't query for it. The effect of adding a new transition should be immediate (I don't know why it should ever not be...). Exiting.\n"); exit(EXIT_FAILURE); } } else { std::vector<LexerTransition> nextTransitions = nextMergeFromDfaPtr->getTransitions(); for (auto transitionFromNextMergeFromDfa : nextTransitions) { const auto si = transitionFromNextMergeFromDfa.getStateAndInput(); DeLOG(std::string{"Couldn't find an opening, pushing back job { to add ("}.append(std::to_string(si.getState())).append(",").append(1, si.getInput()).append(") from dfa-id(").append(std::to_string(nextMergeToDfaPtr->getId())).append(")\n").c_str()); std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(const_cast<lexer_dfa*>(nextMergeToDfaPtr), transitionFromNextMergeFromDfa, ptrsInMergeToAlreadyVisited); jobVector->push_back(job); } } } else { std::cout << "Undefined language specification: duplicate lexer words?" << std::endl; exit(1); } } delete jobVector; } delete jobQueue; std::cout << "Finished jobs!" << std::endl << std::endl; return start; }
const lexer_dfa* lexer_dfa::getNextDfa(const LexerStateAndInput& lexerStateAndInput) const { const StateAndInput<int,char> stateAndInput(lexerStateAndInput.getState(), lexerStateAndInput.getInput(), false); DONT _printInputHash(stateAndInput, "stateAndInput"); DeLOG(std::string{"\t_nextStates::size = "}.append(std::to_string(_nextStates.size())).append("\n").c_str()); DONT _printTransitions(); lexer_dfa* ret; std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetched = _nextStates.find(stateAndInput); //if we can't find anything, it may be possible we enountered //the special "ranged" stateAndInput - which is guaranteed to be mapped //to a unique index in hashmap (its in the formulae) if (fetched == _nextStates.end()) { ret = nullptr; char input = stateAndInput.getInput(); if (input == '0') { StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_NUMBERS_0, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput, "rangedInput"); std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0 = _nextStates.find(rangedInput); if (fetchedNumbers0 != _nextStates.end()) { DONT std::cout << "\tFound rangedNumber! (0)" << std::endl; ret = fetchedNumbers0->second; } else { StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_NUMBERS_0to9, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput2, "SI_NUMBERS_0to9"); std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0to9 = _nextStates.find(rangedInput2); if (fetchedNumbers0to9 != _nextStates.end()) { DONT std::cout << "\trangedNumber:[0-9]" << std::endl; ret = fetchedNumbers0to9->second; } } } else if (input >= '1' && input <= '9') { StateAndInput<int,char> rangedInput0to9(stateAndInput.getState(), SI_NUMBERS_0to9, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput0to9, "rangedInputNumbers0to9"); std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0to9 = _nextStates.find(rangedInput0to9); if (fetchedNumbers0to9 != _nextStates.end()) { DONT std::cout << "\tFound rangedNumber! ([0-9])" << std::endl; ret = fetchedNumbers0to9->second; } else { StateAndInput<int,char> rangedInput1to9(stateAndInput.getState(), SI_NUMBERS_1to9, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput1to9, "rangedInputNumbers1to9"); std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers1to9 = _nextStates.find(rangedInput1to9); if (fetchedNumbers1to9 != _nextStates.end()) { DONT std::cout << "\trangedNumber:[1-9]" << std::endl; ret = fetchedNumbers1to9->second; } } } else if (input >= 'a' && input <= 'z') { DONT std::cout << "\tChecking lowerase ranged" << std::endl; StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_CHARS_LOWER, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput, "rangedInput"); DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_LOWER)" << std::endl; std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsLower = _nextStates.find(rangedInput); if (fetchedCharsLower != _nextStates.end()) { DONT std::cout << "\tFound rangedChars:[a-z]" << std::endl; ret = fetchedCharsLower->second; } else { StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_CHARS_ANY, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput2, "rangedInput2"); DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_ANY)" << std::endl; std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsAny = _nextStates.find(rangedInput2); if (fetchedCharsAny != _nextStates.end()) { DONT std::cout << "rangedChars:([a-z]|[A-Z])" << std::endl; ret = fetchedCharsAny->second; } } } else if (input >= 'A' && input <= 'Z') { DONT std::cout << "\tChecking uppercase ranged" << std::endl; StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_CHARS_UPPER, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput, "rangedInput"); DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_UPPER)" << std::endl; std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsUpper = _nextStates.find(rangedInput); if (fetchedCharsUpper != _nextStates.end()) { //std::cout << "\tFound rangedChars:[A-Z]" << std::endl; //commented in order to benchmark diff between ScanWords ret = fetchedCharsUpper->second; } else { StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_CHARS_ANY, true); DONT std::cout << "\t\t"; DONT _printInputHash(rangedInput, "rangedInput"); DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_ANY)" << std::endl; std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsAny = _nextStates.find(rangedInput2); if (fetchedCharsAny != _nextStates.end()) { DONT std::cout << "rangedChars:([a-z]|[A-Z])" << std::endl; ret = fetchedCharsAny->second; } } } //if by now ret has not been set to soemething other than nullptr, we have one last restort in the empty char if (ret == nullptr) { //we check for 'anythingBut' before we finally check for empty string -- this is the going protocol for now if (_anythingButTransition != nullptr) { if (_anythingButTransition->getIsRanged()) { //todo: perform range checks for anything buts } else if (input != _anythingButTransition->getStateAndInput().getInput()) { ret = const_cast<lexer_dfa*>(_anythingButTransition->getDfaNode()); } } if (ret == nullptr) { //check case of empty char StateAndInput<int,char> stateAndEmptyCharInput(stateAndInput.getState(), '\0'); DONT std::cout << "\t\t"; DONT _printInputHash(stateAndEmptyCharInput, "stateAndEmptyInput"); std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedEmptyChar = _nextStates.find(stateAndEmptyCharInput); if (fetchedEmptyChar != _nextStates.end()) { DONT std::cout << "\tfound empty char!!!" << std::endl; ret = fetchedEmptyChar->second; } else { DONT std::cout << "\tkey not found" << std::endl; ret = nullptr; } } } } else { ret = fetched->second; } return ret; }