Example #1
0
Node* Parser::parse(UINT nHandle, INT iStartNt, UINT* pnErrorToken,
   UINT nTokens, const UINT pnToklist[])
{
   // If pnToklist is NULL, a sequence of integers 0..nTokens-1 will be used
   // Sanity checks
   if (pnErrorToken)
      *pnErrorToken = 0;
   if (!nTokens)
      return NULL;
   if (!this->m_pGrammar)
      return NULL;
   if (iStartNt >= 0)
      // Root must be nonterminal (index < 0)
      return NULL;
   Nonterminal* pRootNt = (*this->m_pGrammar)[iStartNt];
   if (!pRootNt)
      // No or invalid root nonterminal
      return NULL;

   // Initialize the Earley columns
   UINT i;
   Column** pCol = new Column* [nTokens + 1];
   for (i = 0; i < nTokens; i++)
      pCol[i] = new Column(this, pnToklist ? pnToklist[i] : i);
   pCol[i] = new Column(this, (UINT)-1); // Sentinel column

   // Initialize parser state
   State* pQ0 = NULL;
   StateChunk* pChunkHead = NULL;

   // Prepare the initial state
   Production* p = pRootNt->getHead();
   while (p) {
      State* ps = new (pChunkHead) State(iStartNt, 0, p, 0, NULL);
      if (!this->push(nHandle, ps, pCol[0], pQ0))
         discardState(pChunkHead, ps);
      p = p->getNext();
   }

   // Main parse loop
   State* pQ = NULL;
   NodeDict ndV; // Node dictionary

/*
   clock_t clockStart = clock();
   clock_t clockLast = clockStart;
*/

   for (i = 0; i < nTokens + 1; i++) {

      Column* pEi = pCol[i];
      State* pState = pEi->nextState();

      // printf("Column %u, token %u\n", i, pEi->getToken());

      if (!pState && !pQ0) {
         // No parse available at token i-1
         if (pnErrorToken)
            *pnErrorToken = i;
         break;
      }

      pQ = pQ0;
      pQ0 = NULL;
      HNode* pH = NULL;

      while (pState) {
         INT iItem = pState->prodDot();
         INT iNtB = pState->getNt();
         UINT nStart = pState->getStart();
         Node* pW = pState->getNode();
         if (iItem < 0) {
            // Don't push the same nonterminal more than once to the same column
            if (pEi->markSeen(iItem)) {
               // Earley predictor
               // Push all right hand sides of this nonterminal
               p = (*this->m_pGrammar)[iItem]->getHead();
               while (p) {
                  State* psNew = new (pChunkHead) State(iItem, 0, p, i, NULL);
                  if (!this->push(nHandle, psNew, pEi, pQ))
                     discardState(pChunkHead, psNew);
                  p = p->getNext();
               }
            }
            // Add elements from the H set that refer to the
            // nonterminal iItem (nt_C)
            HNode* ph = pH;
            while (ph) {
               if (ph->getNt() == iItem) {
                  Node* pY = this->makeNode(pState, i, ph->getV(), ndV);
                  State* psNew = new (pChunkHead) State(pState, pY);
                  if (!this->push(nHandle, psNew, pEi, pQ))
                     discardState(pChunkHead, psNew);
               }
               ph = ph->getNext();
            }
         }
         else
         if (iItem == 0) {
            // Earley completer
            if (!pW) {
               Label label(iNtB, 0, NULL, i, i);
               pW = ndV.lookupOrAdd(label);
               pW->addFamily(pState->getProd(), NULL, NULL); // Epsilon production
            }
            if (nStart == i) {
               HNode* ph = new HNode(iNtB, pW);
               ph->setNext(pH);
               pH = ph;
            }
            State* psNt = pCol[nStart]->getNtHead(iNtB);
            while (psNt) {
               Node* pY = this->makeNode(psNt, i, pW, ndV);
               State* psNew = new (pChunkHead) State(psNt, pY);
               if (!this->push(nHandle, psNew, pEi, pQ))
                  discardState(pChunkHead, psNew);
               psNt = psNt->getNtNext();
            }
         }
         // Move to the next item on the agenda
         // (which may have been enlarged by the previous code)
         pState = pEi->nextState();
      }

      // Clean up the H set
      while (pH) {
         HNode* ph = pH->getNext();
         delete pH;
         pH = ph;
      }

      // Reset the node dictionary
      ndV.reset();
      Node* pV = NULL;

      if (pQ) {
         Label label(pEi->getToken(), 0, NULL, i, i + 1);
         pV = new Node(label); // Reference is deleted below
      }

      while (pQ) {
         // Earley scanner
         State* psNext = pQ->getNext();
         Node* pY = this->makeNode(pQ, i + 1, pV, ndV);
         // Instead of throwing away the old state and creating
         // a new almost identical one, re-use the old after
         // 'incrementing' it by moving the dot one step to the right
         pQ->increment(pY);
         ASSERT(i + 1 <= nTokens);
         if (!this->push(nHandle, pQ, pCol[i + 1], pQ0))
            pQ->~State();
         pQ = psNext;
      }

      // Clean up reference to pV created above
      if (pV)
         pV->delRef();

/*
      clock_t clockNow = clock();
      clock_t clockElapsed = clockNow - clockStart;
      clock_t clockThis = clockNow - clockLast;
      clockLast = clockNow;
      printf ("Column %u finished in %.3f sec, elapsed %.3f sec\n", i,
         ((float)clockThis) / CLOCKS_PER_SEC, ((float)clockElapsed) / CLOCKS_PER_SEC);
      fflush(stdout);
*/      
   }

/*
   clock_t clockNow = clock() - clockStart;
   printf("Parse loop finished, elapsed %.3f sec\n",
      ((float)clockNow) / CLOCKS_PER_SEC);
*/

   ASSERT(pQ == NULL);
   ASSERT(pQ0 == NULL);

   Node* pResult = NULL;
   if (i > nTokens) {
      // Completed the token loop
      pCol[nTokens]->resetEnum();
      State* ps = pCol[nTokens]->nextState();
      while (ps && !pResult) {
         // Look through the end states until we find one that spans the
         // entire parse tree and derives the starting nonterminal
         pResult = ps->getResult(iStartNt);
         if (pResult)
            // Save the result node from being deleted when the
            // column states are deleted
            pResult->addRef();
         ps = pCol[nTokens]->nextState();
      }
      if (!pResult && pnErrorToken)
         // No parse available at the last column
         *pnErrorToken = nTokens;
   }

/*
   clockNow = clock() - clockStart;
   printf("Result found, elapsed %.3f sec\n",
      ((float)clockNow) / CLOCKS_PER_SEC);
*/

   // Cleanup
   for (i = 0; i < nTokens + 1; i++)
      delete pCol[i];
   delete [] pCol;

   freeStates(pChunkHead);

/*
   clockNow = clock() - clockStart;
   printf("Cleanup finished, elapsed %.3f sec\n",
      ((float)clockNow) / CLOCKS_PER_SEC);
*/

   return pResult; // The caller should call delRef() on this after using it
}