Esempio n. 1
0
std::string PhasedExecution::execute(int argc, char* argv[])
{
    if (_contextManager != nullptr)
    {
        DeLOG("Destroying Old Context For Another Run.");
        delete _contextManager;
    }

    if (argc < 2)
    {
        perror("Not enough arguments, correct usage: ./repss input_file");

        return "";
    }

    _contextManager = new ContextManager();

    auto lexerContext = _contextManager->getContext<ContextType::AllowedTypes, ContextType::Lexer>();

    std::string inputFileName{argv[1]};
    runLexer(&lexerContext, inputFileName);  

    auto grammarContext = _contextManager->getContext<ContextType::AllowedTypes, ContextType::Grammar>();
    runGrammarAggregation(&grammarContext);

    delete _contextManager;
    _contextManager = nullptr;

    DeLOG("Finished PhasedExecution\n");

    std::string generatedOutput;
    return generatedOutput;
}
Esempio n. 2
0
void GrammarBlockAggregator::processAnnotatedData(IGrammarContext* const context)
{
    DeLOG("Beggining processing of annotated data from Lexer\n");
    DeLOG("-------------------------------------------------\n");
    auto annotatedData = context->getAnnotatedData();

    size_t sizeOfAnnotatedData = annotatedData.size();

    for (size_t index = 0; index < sizeOfAnnotatedData; index++)
    {
        DeLOG(std::string{"Processing annotatedData #"}.append(std::to_string(index)).append("\n").c_str());
        auto dataString = annotatedData.getAt(index);

        //parse string for type
        // - it can be either a keyword or data
        //    data can be recognized as just data
        //    anything else is considered a keyword.
        //      -- currently returns "invalid" for first in std::pair if malformed
        auto keywordNameAndContent = getKeywordNameAndContentFromAnnotatedData(dataString);

        DeLOG(std::string{keywordNameAndContent.first}.append(", ").append(keywordNameAndContent.second).append("\n").c_str());
        //if data push it to the current Block.

        //if keyword, analyze it.
        // 'analysis' consists of, is this expected? if not ignore it.
        //   Also if it's a Variable we add a new Block as the newest child
        //   to this current line of block.
        //   If it's a variable then we have to be on the lookout for a
        //   corresponding 'closure' to end the current Block.
        //    So we must make note of this somehow.
        //   Everything else (terminals) are added to the current block
        //    or line(if not within a block). The system as a whole can be
        //    viewed as a block I guess.

        if (keywordNameAndContent.first.compare("data_line") != 0)
        {

        }
        else
        {
            //theres not much here to do, just add it to whatever the current active grammar block is
        }

        //put gramarDataBlock into lexerGrammarProxy 
    }

    auto grammarDataProxy = context->getGrammarDataProxy();
    context->setGrammarBlockAggregate(static_cast<GrammarBlockAggregate&&>(_aggregatedBlocks));
}
Esempio n. 3
0
    void init(const lexer_configuration& config)
    {
        if (_lexerDataProxy != nullptr)
        {
            perror("LexerManager/LexerDataProxy have already been initialized. Doing nothing...");
            return;
        }

        auto scanWordTransitionMap = config.getScanWordTransitionMap();
        auto scanWords = config.getScanWords();
        auto dfaManager = config.getDfaManager();

        _lexerDataProxy = new LexerDataProxy(dfaManager, scanWordTransitionMap, scanWords);
        _context->initLexerDataProxy((const ILexerDataProxy*)_lexerDataProxy);

        DeLOG("Initialized Lexer Data Proxy Successfully.\nSuccessfully Intialized LexerManager\n");
    }
Esempio n. 4
0
//Outside this class (lexer_word_constructor right now), we maintain a list of implicitly/explicitly created
// ScanWordNodes, this ensures we know not to create a ScanWordNode with an existing id explicitly
// if we children are needed for an object we only create it if there doesn't already exist a reference for it -
// this node created here or elsewhere need not have been init'd yet.
//     tothinkabout:will 'maybe' we can actually move all this init stuff into constructor make ScanWordNode const??
void ScanWordNode::init(ScanWordTransitionMap* const transitionMap, const std::unordered_set<ScanWordNode*>& existingScanWordNodes, std::vector<ScanWordNode*>& wordsToBeInitd)
{
    if (_lexerDfa == nullptr)
    {
        DeLOG("Error - API Misuse: Either init has already been called, or nullptr was given to ScanWordNode constructor. Both bad.\n");
        exit(1);
    }

    //To handle case where 2 or more transitions point to same lexer dfa we have locallCreatedScanWordNodes.
    // eg: 'partially ranged' input (a|b|c)->some_lexer_dfa [1]
    std::vector<ScanWordNode*> locallyCreatedScanWordNodes;

    _id = _lexerDfa->getId();
    auto transitions = _lexerDfa->getTransitions();

    _lexerDfa->_printTransitions();

    for (auto aTransition : transitions)
    {
        const auto stateAndInput = aTransition.getStateAndInput();
        const auto input = stateAndInput.getInput();

        //1) check if the transition points to lexer_dfa for which a ScanWordNode already exists (has same id)

        const auto nextDfa = aTransition.getDfaNode();
        const auto nextDfaId = nextDfa->getId();

        ScanWordNode* nextScanWordNode = nullptr;

        //check locally created scan words first
        for (auto existingScanWordNode : locallyCreatedScanWordNodes)
        {
            if (existingScanWordNode->getId() == nextDfaId)
            {
                nextScanWordNode = existingScanWordNode;
                break;
            }
        }

        //if scan word node was already created locally, we can skip ahead
        if (nextScanWordNode == nullptr)
        {
            //if transition already exists in unordered_set param, use it
            for (auto existingScanWordNode : existingScanWordNodes)
            {
                if (existingScanWordNode->getId() == nextDfaId)
                {
                    nextScanWordNode = existingScanWordNode;
                    break;
                }
            }

            //if it exists it neither unordered_set param and not in locallyCreatedScanWordNodes container, we make it
            if (nextScanWordNode == nullptr)
            {//if no corresponding ScanWordNode already exists in vector param, create it, place in toBeInitd
                nextScanWordNode = new ScanWordNode(nextDfa);
                wordsToBeInitd.push_back(nextScanWordNode); //the calling function should take care of transferring elements in wordsToBeInitd to existingScanWordNodes
                locallyCreatedScanWordNodes.push_back(nextScanWordNode); //to handle case where two transitions point to same lexer dfa (say with different inputs)
            }
        }

        //2) at this point nextScanWordNode is guaranteed to be set to something (not nullptr)

        const auto isRangedTransition = aTransition.getIsRanged();
        const auto isAnythingBut = aTransition.getIsAnythingBut();        

        if (isRangedTransition)
        {
            DeLOG("\tisRangedTransition=true\n");

            char rangedPossibilities[] = {SI_CHARS_LOWER, SI_CHARS_UPPER, SI_CHARS_ANY, SI_NUMBERS_0, SI_NUMBERS_1to9, SI_NUMBERS_0to9, SI_EMPTY};

            auto rangedInputCategory = stateAndInput.getInput();

            for (auto possibility : rangedPossibilities)
            {
                bool shouldAddToTransitionMap;
                switch (possibility)
                {
                    case SI_EMPTY:
                        shouldAddToTransitionMap = false; //revisit this. keep false. maybe treat in same spirit as _anythingBut
                        break;
                    case SI_CHARS_LOWER:
                        shouldAddToTransitionMap = islower(rangedInputCategory);
                        break;
                    case SI_CHARS_UPPER:
                        shouldAddToTransitionMap = isupper(rangedInputCategory);
                        break;
                    case SI_CHARS_ANY:
                        shouldAddToTransitionMap = isalpha(rangedInputCategory);
                        break;
                    case SI_NUMBERS_0:
                        shouldAddToTransitionMap = rangedInputCategory == '0';
                        break;
                    case SI_NUMBERS_1to9:
                        shouldAddToTransitionMap = (rangedInputCategory >= '1' && rangedInputCategory <= '9');
                        break;
                    case SI_NUMBERS_0to9:
                        shouldAddToTransitionMap = (rangedInputCategory >= '1' && rangedInputCategory <= '9');
                        break;
                    default:
                        shouldAddToTransitionMap = false;
                        break;
                };

                if (shouldAddToTransitionMap)
                {
                    if (!isAnythingBut)
                    {
                        TransitionInputKey transitionMapKeyRanged(getId(), possibility, true, false, true);
                        std::pair<TransitionInputKey, ScanWordNode*> transitionMapKeyAndValue{ transitionMapKeyRanged, nextScanWordNode };
                        transitionMap->emplace(transitionMapKeyAndValue);

                        addProperty(_properties, ScanWordProperties_t::SCAN_WORD_PROPERTY_HAS_RANGED_TRANSITION);                             }
                    else
                    {
                        if (_anythingButTransition != nullptr)
                        {
                            DeLOG("Ooops, somehow we managed to define two 'anythingBut' transitions, this almost certainly leads to undefined behaviour. Quitting.\n");
                            exit(1);
                        }

                        TransitionInputKey transitionKeyAnythingButRange(getId(), possibility, true, true, false);
                        std::pair<TransitionInputKey, ScanWordNode*>* transitionKeyAndValue = new std::pair<TransitionInputKey, ScanWordNode*>(transitionKeyAnythingButRange, nextScanWordNode);
                        _anythingButTransition = transitionKeyAndValue;

                        addProperty(_properties, ScanWordProperties_t::SCAN_WORD_PROPERTY_HAS_ANYTHING_BUT_TRANSITION);
                    }
                }
            }

            DeLOG("\tSuccessfully set value for index in _RangedTransitionsByCategory\n");
        }
        else if (isAnythingBut)
        {
            if (_anythingButTransition != nullptr)
            {
                DeLOG("Ooops, somehow we managed to define two 'anythingBut' transitions, this almost certainly leads to undefined behaviour. Quitting.\n");
                exit(1);
            }

            TransitionInputKey transitionKeyAnythingButUnranged(getId(), input, false, true, false);
            std::pair<TransitionInputKey, ScanWordNode*>* transitionKeyAndValue = new std::pair<TransitionInputKey, ScanWordNode*>(transitionKeyAnythingButUnranged, nextScanWordNode);
            _anythingButTransition = transitionKeyAndValue;

            addProperty(_properties, ScanWordProperties_t::SCAN_WORD_PROPERTY_HAS_ANYTHING_BUT_TRANSITION);
        }
        else
        {
            std::cout << "\tisRangedTransition=false" << std::endl;

            TransitionInputKey transitionMapKey(getId(), input, false, false, true);
            std::pair<TransitionInputKey, ScanWordNode*> transitionMapKeyAndValue{ transitionMapKey, nextScanWordNode };
            transitionMap->emplace(transitionMapKeyAndValue);
        }
    }

    _lexerDfa = nullptr; //we don't need lexerDfa anymore. todo: after scanwords are made lexerDfas not needed at all
}
Esempio n. 5
0
 ~lexer_configuration() 
 {
     delete _wordConstructor;
     DeLOG("Successfully Deleted Lexer Configuration\n");   
 }
Esempio n. 6
0
std::pair<std::string, std::string> getKeywordNameAndContentFromAnnotatedData(const std::string annotatedData)
{
    char buffer[annotatedData.size()];
    strcpy(buffer, annotatedData.c_str());

    char *token = NULL;

    //begin tokenizing
    token = strtok(buffer, ":");

    if (token == NULL)
    {
        DeLOG("Error: Ooops. It looks like the grammar module is expecting a syntax different from the annotated output of lexer\n");
        return std::make_pair("invalid", "");
    }

    std::string content;

    std::string endOfContentDelimeter;

    if (strlen(token) == 1)
    {
        DeLOG("We shouldn't have empty string keyword names.\n");
        return std::make_pair("invalid", "");
    }
    
    std::string keywordName(token+1);
    if (keywordName.compare("data_line") == 0)
    {
        DeLOG("\tIdentified data_line\n");

        endOfContentDelimeter = "}";
    }
    else if (keywordName.compare("key_word") == 0)
    {
        DeLOG("\tIdentified keyword\n");
        token = strtok(NULL, "=");

        if (token == NULL)
        {
            DeLOG("Error: Ooops. It looks like the grammar module is expecting a syntax different from the annotated output of lexer\n");
            return std::make_pair("invalid", "");
        }

        endOfContentDelimeter = ">";
    }

    std::string lastToken;
    std::string secondLastToken;

    token = strtok(NULL, endOfContentDelimeter.c_str());

    //check to see if there are anymore '}'s. Because we allow '{'
    //within the data part, this needs to be covered
    if (token == NULL)
    {
        DeLOG("Error: Ooops. It looks like the grammar module is expecting a syntax different from the annotated output of lexer");
        return std::make_pair("invalid", "");
    }

    if (keywordName.compare("key_word") == 0 && strlen(token) > 1)
    {
        //This takes care of leading '<' imediately after the = sign in annotated data.
        token = (token+1);
    }

    size_t count = 0;
    do
    {
        content.append(secondLastToken);
        if (keywordName.compare("key_word") == 0 && count > 1 && strcmp("}",token) != 0)
        {
            content.append(endOfContentDelimeter);
        }

        secondLastToken = lastToken;
        lastToken = std::string(token);

        token = strtok(NULL, endOfContentDelimeter.c_str());

        count++;
    } while(token != NULL); 

    if (lastToken.compare("}") == 0)
    {
        if (keywordName.compare("key_word") == 0 && count > 1)
        {
            content.append(endOfContentDelimeter);
        }
    }

    content.append(secondLastToken);

    //the last Token should be the '}'
    if (secondLastToken.empty())
    {
        content.append(lastToken);
    }
    else if (lastToken.compare("}") != 0)
    {
        DeLOG("Warning: The last character in annotated data is expected to be '}'. It appears not to be (grammar module), you should deal wth this.\n");
    }

    return std::make_pair(keywordName, content);
}
Esempio n. 7
0
PhasedExecution::PhasedExecution() : _contextManager(nullptr)
{
    DeLOG("PhasedExecution::PhasedExecution()\n");
}
Esempio n. 8
0
//merges dfas to one dfa for traversal
lexer_word_repr* lexer_dfa_builder::mergeDfas(const std::vector<lexer_word_repr*>* const words, DfaManager& dfaManager) const
{
    lexer_word_repr* start = dfaManager.createLexerWordRepr();

    //So each JOB consists of:
    //  1) a ptr to dfa node where we left off in mergeToWord
    //  2) a transition that we suspect is placeable in mergetToWord
    //  3) a vector of dfa nodes already visited in mergeTo, this prevents folding the fromDfa back "itself"
    //      but in reality there should be at path dinstinguishing mergeFrom from mergeTo that doesn't visit
    //      a node in MergeTo twice. This only makes sense, because the "going back itself" thing is strictly for
    //      kleen closure like behaviour on runtime. The minimal "description" of automata is actually constant 
    //      and doesn't need to have two nodes twice.
    //      repeated.
    auto jobQueue = new std::vector<std::vector<std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>>>*>();

    for (int i = 0; i < words->size(); i++)
    {
        auto jobVector = new std::vector<std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>>>();
        jobQueue->push_back(jobVector);
    }

    std::cout << std::endl << "Merging dfas to one, words size: " << words->size() << std::endl;

    //iterate through words
    int jobLineIndex = 0;
    for (int i =0; i < words->size(); i++)
    {
        lexer_dfa* word = words->at(i);
        std::cout << word << std::endl;
        std::cout << word->getId() << std::endl;

        lexer_dfa* mergeToDfaPtr = start;
        lexer_dfa* mergeFromDfaPtr = word;

        std::vector<LexerTransition> nextTransitions = mergeFromDfaPtr->getTransitions();

        DeLOG("Getting transitions for word\n")

        for (auto transition : nextTransitions)
        {
            DeLOG("Adding job to job Queue\n");

            std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(mergeToDfaPtr, transition, std::vector<lexer_dfa*>{});
            (jobQueue->at(jobLineIndex))->push_back(job);
        }

        jobLineIndex++;
    }

    DeLOG(std::string("\nMerge Process: ").append(std::to_string(jobQueue->size())).append(" job lines(queues) total\n").c_str());

    for (jobLineIndex = 0; jobLineIndex < jobQueue->size(); jobLineIndex++)
    {
        DeLOG(std::string("Processing Job line #").append(std::to_string(jobLineIndex+1)).append("\n").c_str());
        auto jobVector = jobQueue->at(jobLineIndex);
        while (jobVector->size() != 0)
        {
            DeLOG(std::string("\nThere are ").append(std::to_string(jobVector->size())).append(" in job vector. Processing Job #").append(std::to_string(jobLineIndex + 1)).append("\n").c_str());

            auto currJobTuple = jobVector->back();
            jobVector->pop_back();

            auto currMergeToDfaPtr = std::get<0>(currJobTuple);

            //here we'll check the currMergeToDfaPtr against the previously visited ptrs in MergeTo
            //if we've already visited it, we know to ignore it this transition (and NOT to put job back in queue)
            auto ptrsInMergeToAlreadyVisited = std::get<2>(currJobTuple);
            auto skipAndContinue = false;
            for (auto mergeToDfaPtrVisited : ptrsInMergeToAlreadyVisited)
            {
                if (currMergeToDfaPtr == mergeToDfaPtrVisited)
                {
                    skipAndContinue = true;
                    break;
                }
            }

            if (skipAndContinue)
            {
                continue;
            }

            ptrsInMergeToAlreadyVisited.push_back(currMergeToDfaPtr);

            auto transitionFromCurrMergeFromDfaPtr = std::get<1>(currJobTuple);            
            auto nextMergeFromDfaPtr = transitionFromCurrMergeFromDfaPtr.getDfaNode();
            const auto si = transitionFromCurrMergeFromDfaPtr.getStateAndInput();

            DeLOG(std::string{"si = {"}.append(std::to_string(si.getState())).append(1, si.getInput()).append("}\n").c_str());

            auto nextMergeToDfaPtrCandidateInfo1 = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), false); 
            auto nextMergeToDfaPtrCandidateProperties1 = nextMergeToDfaPtrCandidateInfo1.second;

            auto nextMergeToDfaPtrCandidateInfo2 = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), true);
            auto nextMergeToDfaPtrCandidateProperties2 = nextMergeToDfaPtrCandidateInfo2.second;
            
            //aka mergeFromDfaProperties
            const auto currentMergeFromTransitionProperties = transitionFromCurrMergeFromDfaPtr.getProperties();

            if (checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_CONTINUANCE)
                 || checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_EJECT))
            {
              //If the transition ~to~ current ~mergeFrom~ dfa has the
              // properties: PUSH_DOWN_EJECT or PUSH_DOWN_CONTINUANCE,
              // then we assume we are in a state of recursion. If the
              // recursive pathway in ~mergeTo~ dfa is not taken,
              // nextMergeaToDfaPtrCandidatePropertes is 0x0, then we
              // add the current ~mergeFrom~ dfa (noting that it is for a
              // recursion (stack-count > 0) to the currMergeToDfa. 
              // If the position is filled, properties != 0x0 in dfaInfo
              // when we query with param indicating stackCount > 0,
              // then we push the ~nextMergeTo~ dfa into thejobQueue
              // (along with the properties of this is not already being
              // done (like w\ the entire transition))

              lexer_dfa* nextMergeToDfaPtr = nullptr;
              if (nextMergeToDfaPtrCandidateProperties2 != 0x0)
              { 
                nextMergeToDfaPtr = nextMergeToDfaPtrCandidateInfo2.first;
              }

              currMergeToDfaPtr->_printTransitions();
              std::cout << "is there nextDfaPtr? " << (nextMergeToDfaPtr != nullptr ? "yes" : "no")
                      << nextMergeToDfaPtr << std::endl;

              if (nextMergeToDfaPtr == nullptr)
              {
                LexerStateAndInput aLexerStateAndInput = transitionFromCurrMergeFromDfaPtr.getStateAndInput();

                DeLOG(std::string{"::adding transition(["}.append(std::to_string(aLexerStateAndInput.getState())).append(", '").append(1, aLexerStateAndInput.getInput()).append("']->").append(1, nextMergeFromDfaPtr->getId()).append(") to dfa(").append(1, currMergeToDfaPtr->getId()).append(")\n").c_str());

                StateAndInput<int,char> aStateAndInput(aLexerStateAndInput.getState(), aLexerStateAndInput.getInput(), transitionFromCurrMergeFromDfaPtr.getIsRanged());
                currMergeToDfaPtr->add_next_dfa(aStateAndInput, nextMergeFromDfaPtr, currentMergeFromTransitionProperties);

                //Sanity check
                auto nextMergeToDfaPtrCandidateInfoSanity = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), true);
                auto nextMergeToDfaPtrCandidatePropertiesSanity = nextMergeToDfaPtrCandidateInfoSanity.second;

                if (nextMergeToDfaPtrCandidateInfoSanity.first == nullptr)
                {
                  perror("\nCould not find Node. Exiting.\n");
                  exit(EXIT_FAILURE);
                }

              }
              else
              {
                std::vector<LexerTransition> nextTransitions = nextMergeFromDfaPtr->getTransitions();

                for (auto transitionFromNextMergeFromDfa : nextTransitions)
                {
                  const auto si = transitionFromNextMergeFromDfa.getStateAndInput();
                  DeLOG(std::string{"Couldn't find an opening, pushing back job { to add ("}.append(std::to_string(si.getState())).append(",").append(1, si.getInput()).append(") from dfa-id(").append(std::to_string(nextMergeToDfaPtr->getId())).append(")\n").c_str());

                  std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(const_cast<lexer_dfa*>(nextMergeToDfaPtr), transitionFromNextMergeFromDfa, ptrsInMergeToAlreadyVisited);

                  jobVector->push_back(job);
                }                
              }
            }
            else if (checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_NORMAL) 
              || checkForProperty(currentMergeFromTransitionProperties, Lexer_Dfa_Properties::ISA_PUSH_DOWN_ACTIVATOR))
            {
              //Likewise if the transition ~to~ current ~mergeFrom~ dfa 
              // has the properties: NORMAL or PUSH_DOWN_ACTIVATOR,
              // then we assume then we assume a non-recursive (even if
              // by the top top level lexer perspective this has yet to
              // be determined. If the non-recursive pathway in ~mergeTo~
              // dfa is not taken, then we add the current ~mergeTo~ dfa
              // (noting that it is for a recursion (stack-count > 0).
              // If the position is filled (non-nullptr) dfa result for
              // query with stackCount > 0 param, then we push the
              // ~nextMergeTo~ into thejobQueue.            
              lexer_dfa* nextMergeToDfaPtr = nullptr;

              if (nextMergeToDfaPtrCandidateProperties1 != 0x0)
              {
                 nextMergeToDfaPtr = nextMergeToDfaPtrCandidateInfo1.first;
              }

              currMergeToDfaPtr->_printTransitions();
              std::cout << "is there nextDfaPtr? " << (nextMergeToDfaPtr != nullptr ? "yes" : "no")
                        << nextMergeToDfaPtr << std::endl;

              if (nextMergeToDfaPtr == nullptr)
              {
                LexerStateAndInput aLexerStateAndInput = transitionFromCurrMergeFromDfaPtr.getStateAndInput();

                DeLOG(std::string{"::adding transition(["}.append(std::to_string(aLexerStateAndInput.getState())).append(", '").append(1, aLexerStateAndInput.getInput()).append("']->").append(1, nextMergeFromDfaPtr->getId()).append(") to dfa(").append(1, currMergeToDfaPtr->getId()).append(")\n").c_str());

                StateAndInput<int,char> aStateAndInput(aLexerStateAndInput.getState(), aLexerStateAndInput.getInput(), transitionFromCurrMergeFromDfaPtr.getIsRanged());
                currMergeToDfaPtr->add_next_dfa(aStateAndInput, nextMergeFromDfaPtr, currentMergeFromTransitionProperties);                

                //Sanity check
                auto nextMergeToDfaPtrCandidateInfoSanity = currMergeToDfaPtr->getNextDfaForInput(si.getInput(), false);
                auto nextMergeToDfaPtrCandidatePropertiesSanity = nextMergeToDfaPtrCandidateInfoSanity;

                if (nextMergeToDfaPtrCandidateInfoSanity.first == nullptr)
                {
                  perror("\nYeah, this is bad. After we just added our new transition to merged rep, we can't query for it. The effect of adding a new transition should be immediate (I don't know why it should ever not be...). Exiting.\n");
                  exit(EXIT_FAILURE);
                }
              }
              else
              {
                std::vector<LexerTransition> nextTransitions = nextMergeFromDfaPtr->getTransitions();

                for (auto transitionFromNextMergeFromDfa : nextTransitions)
                {
                  const auto si = transitionFromNextMergeFromDfa.getStateAndInput();
                  DeLOG(std::string{"Couldn't find an opening, pushing back job { to add ("}.append(std::to_string(si.getState())).append(",").append(1, si.getInput()).append(") from dfa-id(").append(std::to_string(nextMergeToDfaPtr->getId())).append(")\n").c_str());

                  std::tuple<lexer_dfa*, LexerTransition, std::vector<lexer_dfa*>> job(const_cast<lexer_dfa*>(nextMergeToDfaPtr), transitionFromNextMergeFromDfa, ptrsInMergeToAlreadyVisited);

                  jobVector->push_back(job);
                }
              }
            }
            else
            {
              std::cout << "Undefined language specification: duplicate lexer words?" << std::endl;
              exit(1);
            }

        }

        delete jobVector;
    }

    delete jobQueue;

    std::cout << "Finished jobs!" << std::endl << std::endl;

    return start;
}
Esempio n. 9
0
const lexer_dfa* lexer_dfa::getNextDfa(const LexerStateAndInput& lexerStateAndInput) const
{
    const StateAndInput<int,char> stateAndInput(lexerStateAndInput.getState(), 
        lexerStateAndInput.getInput(), false);

    DONT _printInputHash(stateAndInput, "stateAndInput");
    DeLOG(std::string{"\t_nextStates::size = "}.append(std::to_string(_nextStates.size())).append("\n").c_str());
    DONT _printTransitions();

    lexer_dfa* ret;

    std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetched
        = _nextStates.find(stateAndInput);

    //if we can't find anything, it may be possible we enountered
    //the special "ranged" stateAndInput - which is guaranteed to be mapped
    //to a unique index in hashmap (its in the formulae)
    if (fetched == _nextStates.end())
    {
        ret = nullptr;

        char input = stateAndInput.getInput();
        if (input == '0')
        {
            StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_NUMBERS_0, true);

            DONT std::cout << "\t\t";
            DONT _printInputHash(rangedInput, "rangedInput");

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0
                = _nextStates.find(rangedInput);

            if (fetchedNumbers0 != _nextStates.end())
            {
                DONT std::cout << "\tFound rangedNumber! (0)" << std::endl;
                ret = fetchedNumbers0->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_NUMBERS_0to9, true);
                DONT std::cout << "\t\t";
                DONT _printInputHash(rangedInput2, "SI_NUMBERS_0to9"); 

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0to9
                    = _nextStates.find(rangedInput2);

                if (fetchedNumbers0to9 != _nextStates.end())
                {
                    DONT std::cout << "\trangedNumber:[0-9]" << std::endl; 
                    ret = fetchedNumbers0to9->second;
                }
            }
        }
        else if (input >= '1' && input <= '9')
        {
            StateAndInput<int,char> rangedInput0to9(stateAndInput.getState(), SI_NUMBERS_0to9, true);

            DONT std::cout << "\t\t"; 
            DONT _printInputHash(rangedInput0to9, "rangedInputNumbers0to9");

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers0to9
                = _nextStates.find(rangedInput0to9);

            if (fetchedNumbers0to9 != _nextStates.end())
            {
                DONT std::cout << "\tFound rangedNumber! ([0-9])" << std::endl; 
                ret = fetchedNumbers0to9->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput1to9(stateAndInput.getState(), SI_NUMBERS_1to9, true);
                DONT std::cout << "\t\t"; 
                DONT _printInputHash(rangedInput1to9, "rangedInputNumbers1to9");

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedNumbers1to9
                    = _nextStates.find(rangedInput1to9);

                if (fetchedNumbers1to9 != _nextStates.end())
                {
                    DONT std::cout << "\trangedNumber:[1-9]" << std::endl; 
                    ret = fetchedNumbers1to9->second;
                }
            }
        }   
        else if (input >= 'a' && input <= 'z')
        {
            DONT std::cout << "\tChecking lowerase ranged" << std::endl; 

            StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_CHARS_LOWER, true);

            DONT std::cout << "\t\t";
            DONT _printInputHash(rangedInput, "rangedInput"); 

            DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_LOWER)" << std::endl;

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsLower
                = _nextStates.find(rangedInput);

            if (fetchedCharsLower != _nextStates.end())
            {
                DONT std::cout << "\tFound rangedChars:[a-z]" << std::endl; 
                ret = fetchedCharsLower->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_CHARS_ANY, true);

                DONT std::cout << "\t\t"; 
                DONT _printInputHash(rangedInput2, "rangedInput2"); 

                DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_ANY)" << std::endl; 

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsAny
                    = _nextStates.find(rangedInput2);

                if (fetchedCharsAny != _nextStates.end())
                {
                    DONT std::cout << "rangedChars:([a-z]|[A-Z])" << std::endl;
                    ret = fetchedCharsAny->second;
                }

            }   
        }
        else if (input >= 'A' && input <= 'Z')
        {
            DONT std::cout << "\tChecking uppercase ranged" << std::endl; 

            StateAndInput<int,char> rangedInput(stateAndInput.getState(), SI_CHARS_UPPER, true);

            DONT std::cout << "\t\t";
            DONT _printInputHash(rangedInput, "rangedInput"); 

            DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_UPPER)" << std::endl;

            std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsUpper
                = _nextStates.find(rangedInput);

            if (fetchedCharsUpper != _nextStates.end())
            {
                //std::cout << "\tFound rangedChars:[A-Z]" << std::endl; //commented in order to benchmark diff between ScanWords
                ret = fetchedCharsUpper->second;
            }
            else
            {
                StateAndInput<int,char> rangedInput2(stateAndInput.getState(), SI_CHARS_ANY, true);

                DONT std::cout << "\t\t";
                DONT _printInputHash(rangedInput, "rangedInput"); 

                DONT std::cout << "\t\tlexer_dfa::getNextState(...): (state,input) = (" << stateAndInput.getState() << ", SI_CHARS_ANY)" << std::endl;

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedCharsAny
                    = _nextStates.find(rangedInput2);

                if (fetchedCharsAny != _nextStates.end())
                {
                    DONT std::cout << "rangedChars:([a-z]|[A-Z])" << std::endl;
                    ret = fetchedCharsAny->second;
                }
            }
        }

        //if by now ret has not been set to soemething other than nullptr, we have one last restort in the empty char
        if (ret == nullptr)
        {
            //we check for 'anythingBut' before we finally check for empty string -- this is the going protocol for now
            if (_anythingButTransition != nullptr)
            {
                if (_anythingButTransition->getIsRanged())
                {
                    //todo: perform range checks for anything buts
                }
                else if (input != _anythingButTransition->getStateAndInput().getInput())
                {
                   ret = const_cast<lexer_dfa*>(_anythingButTransition->getDfaNode());
                }
            }

            if (ret == nullptr)
            {
                //check case of empty char
                StateAndInput<int,char> stateAndEmptyCharInput(stateAndInput.getState(), '\0');

                DONT std::cout << "\t\t";
                DONT _printInputHash(stateAndEmptyCharInput, "stateAndEmptyInput"); 

                std::unordered_map<StateAndInput<int,char>, lexer_dfa*, StateAndInputHashFunction, StateAndInputEquals>::const_iterator fetchedEmptyChar
                    = _nextStates.find(stateAndEmptyCharInput);                

                if (fetchedEmptyChar != _nextStates.end())
                {
                    DONT std::cout << "\tfound empty char!!!" << std::endl;
                    ret = fetchedEmptyChar->second;
                }
                else
                {
                    DONT std::cout << "\tkey not found" << std::endl;
                    ret = nullptr;
                }
            }
        }
    }
    else
    {
        ret = fetched->second;
    }

    return ret;
}