Пример #1
0
//merges dfas to one dfa for traversal
lexer_word_repr* lexer_dfa_builder::mergeDfas(const std::vector<lexer_word_repr*>* const words, DfaManager& dfaManager) const
{
    lexer_word_repr* start = dfaManager.createLexerWordRepr();

    //So each JOB consists of:
    //  1) a ptr to dfa node where we left off in mergeToWord
    //  2) a transition that we suspect is placeable in mergetToWord
    //  3) a vector of dfa nodes already visited in mergeTo, this prevents folding the fromDfa back "itself"
    //      but in reality there should be at path dinstinguishing mergeFrom from mergeTo that doesn't visit
    //      a node in MergeTo twice. This only makes sense, because the "going back itself" thing is strictly for
    //      kleen closure like behaviour on runtime. The minimal "description" of automata is actually constant 
    //      and doesn't need to have two nodes twice.
    //      repeated.
    auto jobQueue = new std::vector<std::vector<std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>>>*>();

    for (int i = 0; i < words->size(); i++)
    {
        auto jobVector = new std::vector<std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>>>();
        jobQueue->push_back(jobVector);
    }

    std::cout << std::endl << "Merging dfas to one, words size: " << words->size() << std::endl;

    //iterate through words
    int jobLineIndex = 0;
    for (int i =0; i < words->size(); i++)
    {
        lexer_dfa* word = words->at(i);
        std::cout << word << std::endl;
        std::cout << word->getId() << std::endl;

        lexer_dfa* mergeToDfaPtr = start;
        lexer_dfa* mergeFromDfaPtr = word;

        std::vector<LexerTransition> nextTransitions = mergeFromDfaPtr->getTransitions();

        DeLOG("Getting transitions for word\n")

        for (auto transition : nextTransitions)
        {
            DeLOG("Adding job to job Queue\n");

            std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(mergeToDfaPtr, transition, std::vector<lexer_dfa*>{});
            (jobQueue->at(jobLineIndex))->push_back(job);
        }

        jobLineIndex++;
    }

    DeLOG(std::string("\nMerge Process: ").append(std::to_string(jobQueue->size())).append(" job lines(queues) total\n").c_str());

    for (jobLineIndex = 0; jobLineIndex < jobQueue->size(); jobLineIndex++)
    {
        DeLOG(std::string("Processing Job line #").append(std::to_string(jobLineIndex+1)).append("\n").c_str());
        auto jobVector = jobQueue->at(jobLineIndex);
        while (jobVector->size() != 0)
        {
            DeLOG(std::string("\nThere are ").append(std::to_string(jobVector->size())).append(" in job vector. Processing Job #").append(std::to_string(jobLineIndex + 1)).append("\n").c_str());

            auto currJobTuple = jobVector->back();
            jobVector->pop_back();

            auto currMergeToDfaPtr = std::get<0>(currJobTuple);

            //here we'll check the currMergeToDfaPtr against the previously visited ptrs in MergeTo
            //if we've already visited it, we know to ignore it this transition (and NOT to put job back in queue)
            auto ptrsInMergeToAlreadyVisited = std::get<2>(currJobTuple);
            auto skipAndContinue = false;
            for (auto mergeToDfaPtrVisited : ptrsInMergeToAlreadyVisited)
            {
                if (currMergeToDfaPtr == mergeToDfaPtrVisited)
                {
                    skipAndContinue = true;
                    break;
                }
            }

            if (skipAndContinue)
            {
                continue;
            }

            ptrsInMergeToAlreadyVisited.push_back(currMergeToDfaPtr);

            auto transitionFromCurrMergeFromDfaPtr = std::get<1>(currJobTuple);            
            auto nextMergeFromDfaPtr = transitionFromCurrMergeFromDfaPtr.getDfaNode();
            const auto si = transitionFromCurrMergeFromDfaPtr.getStateAndInput();

            DeLOG(std::string{"si = {"}.append(std::to_string(si.getState())).append(1, si.getInput()).append("}\n").c_str());

            auto nextMergeToDfaPtrCandidateInfo1 = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), false); 
            auto nextMergeToDfaPtrCandidateProperties1 = nextMergeToDfaPtrCandidateInfo1.second;

            auto nextMergeToDfaPtrCandidateInfo2 = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), true);
            auto nextMergeToDfaPtrCandidateProperties2 = nextMergeToDfaPtrCandidateInfo2.second;
            
            //aka mergeFromDfaProperties
            const auto currentMergeFromTransitionProperties = transitionFromCurrMergeFromDfaPtr.getProperties();

            if (checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_CONTINUANCE)
                 || checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_EJECT))
            {
              //If the transition ~to~ current ~mergeFrom~ dfa has the
              // properties: PUSH_DOWN_EJECT or PUSH_DOWN_CONTINUANCE,
              // then we assume we are in a state of recursion. If the
              // recursive pathway in ~mergeTo~ dfa is not taken,
              // nextMergeaToDfaPtrCandidatePropertes is 0x0, then we
              // add the current ~mergeFrom~ dfa (noting that it is for a
              // recursion (stack-count > 0) to the currMergeToDfa. 
              // If the position is filled, properties != 0x0 in dfaInfo
              // when we query with param indicating stackCount > 0,
              // then we push the ~nextMergeTo~ dfa into thejobQueue
              // (along with the properties of this is not already being
              // done (like w\ the entire transition))

              lexer_dfa* nextMergeToDfaPtr = nullptr;
              if (nextMergeToDfaPtrCandidateProperties2 != 0x0)
              { 
                nextMergeToDfaPtr = nextMergeToDfaPtrCandidateInfo2.first;
              }

              currMergeToDfaPtr->_printTransitions();
              std::cout << "is there nextDfaPtr? " << (nextMergeToDfaPtr != nullptr ? "yes" : "no")
                      << nextMergeToDfaPtr << std::endl;

              if (nextMergeToDfaPtr == nullptr)
              {
                LexerStateAndInput aLexerStateAndInput = transitionFromCurrMergeFromDfaPtr.getStateAndInput();

                DeLOG(std::string{"::adding transition(["}.append(std::to_string(aLexerStateAndInput.getState())).append(", '").append(1, aLexerStateAndInput.getInput()).append("']->").append(1, nextMergeFromDfaPtr->getId()).append(") to dfa(").append(1, currMergeToDfaPtr->getId()).append(")\n").c_str());

                StateAndInput<int,char> aStateAndInput(aLexerStateAndInput.getState(), aLexerStateAndInput.getInput(), transitionFromCurrMergeFromDfaPtr.getIsRanged());
                currMergeToDfaPtr->add_next_dfa(aStateAndInput, nextMergeFromDfaPtr, currentMergeFromTransitionProperties);

                //Sanity check
                auto nextMergeToDfaPtrCandidateInfoSanity = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), true);
                auto nextMergeToDfaPtrCandidatePropertiesSanity = nextMergeToDfaPtrCandidateInfoSanity.second;

                if (nextMergeToDfaPtrCandidateInfoSanity.first == nullptr)
                {
                  perror("\nCould not find Node. Exiting.\n");
                  exit(EXIT_FAILURE);
                }

              }
              else
              {
                std::vector<LexerTransition> nextTransitions = nextMergeFromDfaPtr->getTransitions();

                for (auto transitionFromNextMergeFromDfa : nextTransitions)
                {
                  const auto si = transitionFromNextMergeFromDfa.getStateAndInput();
                  DeLOG(std::string{"Couldn't find an opening, pushing back job { to add ("}.append(std::to_string(si.getState())).append(",").append(1, si.getInput()).append(") from dfa-id(").append(std::to_string(nextMergeToDfaPtr->getId())).append(")\n").c_str());

                  std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(const_cast<lexer_dfa*>(nextMergeToDfaPtr), transitionFromNextMergeFromDfa, ptrsInMergeToAlreadyVisited);

                  jobVector->push_back(job);
                }                
              }
            }
            else if (checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_NORMAL) 
              || checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_ACTIVATOR))
            {
              //Likewise if the transition ~to~ current ~mergeFrom~ dfa 
              // has the properties: NORMAL or PUSH_DOWN_ACTIVATOR,
              // then we assume then we assume a non-recursive (even if
              // by the top top level lexer perspective this has yet to
              // be determined. If the non-recursive pathway in ~mergeTo~
              // dfa is not taken, then we add the current ~mergeTo~ dfa
              // (noting that it is for a recursion (stack-count > 0).
              // If the position is filled (non-nullptr) dfa result for
              // query with stackCount > 0 param, then we push the
              // ~nextMergeTo~ into thejobQueue.            
              lexer_dfa* nextMergeToDfaPtr = nullptr;

              if (nextMergeToDfaPtrCandidateProperties1 != 0x0)
              {
                 nextMergeToDfaPtr = nextMergeToDfaPtrCandidateInfo1.first;
              }

              currMergeToDfaPtr->_printTransitions();
              std::cout << "is there nextDfaPtr? " << (nextMergeToDfaPtr != nullptr ? "yes" : "no")
                        << nextMergeToDfaPtr << std::endl;

              if (nextMergeToDfaPtr == nullptr)
              {
                LexerStateAndInput aLexerStateAndInput = transitionFromCurrMergeFromDfaPtr.getStateAndInput();

                DeLOG(std::string{"::adding transition(["}.append(std::to_string(aLexerStateAndInput.getState())).append(", '").append(1, aLexerStateAndInput.getInput()).append("']->").append(1, nextMergeFromDfaPtr->getId()).append(") to dfa(").append(1, currMergeToDfaPtr->getId()).append(")\n").c_str());

                StateAndInput<int,char> aStateAndInput(aLexerStateAndInput.getState(), aLexerStateAndInput.getInput(), transitionFromCurrMergeFromDfaPtr.getIsRanged());
                currMergeToDfaPtr->add_next_dfa(aStateAndInput, nextMergeFromDfaPtr, currentMergeFromTransitionProperties);                

                //Sanity check
                auto nextMergeToDfaPtrCandidateInfoSanity = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), false);
                auto nextMergeToDfaPtrCandidatePropertiesSanity = nextMergeToDfaPtrCandidateInfoSanity;

                if (nextMergeToDfaPtrCandidateInfoSanity.first == nullptr)
                {
                  perror("\nYeah, this is bad. After we just added our new transition to merged rep, we can't query for it. The effect of adding a new transition should be immediate (I don't know why it should ever not be...). Exiting.\n");
                  exit(EXIT_FAILURE);
                }
              }
              else
              {
                std::vector<LexerTransition> nextTransitions = nextMergeFromDfaPtr->getTransitions();

                for (auto transitionFromNextMergeFromDfa : nextTransitions)
                {
                  const auto si = transitionFromNextMergeFromDfa.getStateAndInput();
                  DeLOG(std::string{"Couldn't find an opening, pushing back job { to add ("}.append(std::to_string(si.getState())).append(",").append(1, si.getInput()).append(") from dfa-id(").append(std::to_string(nextMergeToDfaPtr->getId())).append(")\n").c_str());

                  std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(const_cast<lexer_dfa*>(nextMergeToDfaPtr), transitionFromNextMergeFromDfa, ptrsInMergeToAlreadyVisited);

                  jobVector->push_back(job);
                }
              }
            }
            else
            {
              std::cout << "Undefined language specification: duplicate lexer words?" << std::endl;
              exit(1);
            }

        }

        delete jobVector;
    }

    delete jobQueue;

    std::cout << "Finished jobs!" << std::endl << std::endl;

    return start;
}
Пример #2
0
const lexer_dfa* lexer_dfa::getNextDfa(const LexerStateAndInput& lexerStateAndInput) const
{
    const StateAndInput<int,char> stateAndInput(lexerStateAndInput.getState(), 
        lexerStateAndInput.getInput(), false);

    DONT _printInputHash(stateAndInput, "stateAndInput");
    DeLOG(std::string{"\t_nextStates::size = "}.append(std::to_string(_nextStates.size())).append("\n").c_str());
    DONT _printTransitions();

    lexer_dfa* ret;

    std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetched
        = _nextStates.find(stateAndInput);

    //if we can't find anything, it may be possible we enountered
    //the special "ranged" stateAndInput - which is guaranteed to be mapped
    //to a unique index in hashmap (its in the formulae)
    if (fetched == _nextStates.end())
    {
        ret = nullptr;

        char input = stateAndInput.getInput();
        if (input == '0')
        {
            StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_NUMBERS_0, true);

            DONT std::cout << "\t\t";
            DONT _printInputHash(rangedInput, "rangedInput");

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0
                = _nextStates.find(rangedInput);

            if (fetchedNumbers0 != _nextStates.end())
            {
                DONT std::cout << "\tFound rangedNumber! (0)" << std::endl;
                ret = fetchedNumbers0->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_NUMBERS_0to9, true);
                DONT std::cout << "\t\t";
                DONT _printInputHash(rangedInput2, "SI_NUMBERS_0to9"); 

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0to9
                    = _nextStates.find(rangedInput2);

                if (fetchedNumbers0to9 != _nextStates.end())
                {
                    DONT std::cout << "\trangedNumber:[0-9]" << std::endl; 
                    ret = fetchedNumbers0to9->second;
                }
            }
        }
        else if (input >= '1' && input <= '9')
        {
            StateAndInput<int,char> rangedInput0to9(stateAndInput.getState(), SI_NUMBERS_0to9, true);

            DONT std::cout << "\t\t"; 
            DONT _printInputHash(rangedInput0to9, "rangedInputNumbers0to9");

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0to9
                = _nextStates.find(rangedInput0to9);

            if (fetchedNumbers0to9 != _nextStates.end())
            {
                DONT std::cout << "\tFound rangedNumber! ([0-9])" << std::endl; 
                ret = fetchedNumbers0to9->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput1to9(stateAndInput.getState(), SI_NUMBERS_1to9, true);
                DONT std::cout << "\t\t"; 
                DONT _printInputHash(rangedInput1to9, "rangedInputNumbers1to9");

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers1to9
                    = _nextStates.find(rangedInput1to9);

                if (fetchedNumbers1to9 != _nextStates.end())
                {
                    DONT std::cout << "\trangedNumber:[1-9]" << std::endl; 
                    ret = fetchedNumbers1to9->second;
                }
            }
        }   
        else if (input >= 'a' && input <= 'z')
        {
            DONT std::cout << "\tChecking lowerase ranged" << std::endl; 

            StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_CHARS_LOWER, true);

            DONT std::cout << "\t\t";
            DONT _printInputHash(rangedInput, "rangedInput"); 

            DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_LOWER)" << std::endl;

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsLower
                = _nextStates.find(rangedInput);

            if (fetchedCharsLower != _nextStates.end())
            {
                DONT std::cout << "\tFound rangedChars:[a-z]" << std::endl; 
                ret = fetchedCharsLower->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_CHARS_ANY, true);

                DONT std::cout << "\t\t"; 
                DONT _printInputHash(rangedInput2, "rangedInput2"); 

                DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_ANY)" << std::endl; 

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsAny
                    = _nextStates.find(rangedInput2);

                if (fetchedCharsAny != _nextStates.end())
                {
                    DONT std::cout << "rangedChars:([a-z]|[A-Z])" << std::endl;
                    ret = fetchedCharsAny->second;
                }

            }   
        }
        else if (input >= 'A' && input <= 'Z')
        {
            DONT std::cout << "\tChecking uppercase ranged" << std::endl; 

            StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_CHARS_UPPER, true);

            DONT std::cout << "\t\t";
            DONT _printInputHash(rangedInput, "rangedInput"); 

            DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_UPPER)" << std::endl;

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsUpper
                = _nextStates.find(rangedInput);

            if (fetchedCharsUpper != _nextStates.end())
            {
                //std::cout << "\tFound rangedChars:[A-Z]" << std::endl; //commented in order to benchmark diff between ScanWords
                ret = fetchedCharsUpper->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_CHARS_ANY, true);

                DONT std::cout << "\t\t";
                DONT _printInputHash(rangedInput, "rangedInput"); 

                DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_ANY)" << std::endl;

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsAny
                    = _nextStates.find(rangedInput2);

                if (fetchedCharsAny != _nextStates.end())
                {
                    DONT std::cout << "rangedChars:([a-z]|[A-Z])" << std::endl;
                    ret = fetchedCharsAny->second;
                }
            }
        }

        //if by now ret has not been set to soemething other than nullptr, we have one last restort in the empty char
        if (ret == nullptr)
        {
            //we check for 'anythingBut' before we finally check for empty string -- this is the going protocol for now
            if (_anythingButTransition != nullptr)
            {
                if (_anythingButTransition->getIsRanged())
                {
                    //todo: perform range checks for anything buts
                }
                else if (input != _anythingButTransition->getStateAndInput().getInput())
                {
                   ret = const_cast<lexer_dfa*>(_anythingButTransition->getDfaNode());
                }
            }

            if (ret == nullptr)
            {
                //check case of empty char
                StateAndInput<int,char> stateAndEmptyCharInput(stateAndInput.getState(), '\0');

                DONT std::cout << "\t\t";
                DONT _printInputHash(stateAndEmptyCharInput, "stateAndEmptyInput"); 

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedEmptyChar
                    = _nextStates.find(stateAndEmptyCharInput);                

                if (fetchedEmptyChar != _nextStates.end())
                {
                    DONT std::cout << "\tfound empty char!!!" << std::endl;
                    ret = fetchedEmptyChar->second;
                }
                else
                {
                    DONT std::cout << "\tkey not found" << std::endl;
                    ret = nullptr;
                }
            }
        }
    }
    else
    {
        ret = fetched->second;
    }

    return ret;
}