//----------------------------------------------------------------------------- // // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 // //----------------------------------------------------------------------------- void RBBITableBuilder::calcLastPos(RBBINode *n) { if (n == NULL) { return; } if (n->fType == RBBINode::leafChar || n->fType == RBBINode::endMark || n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { // These are non-empty leaf node types. n->fLastPosSet->addElement(n, *fStatus); return; } // The node is not a leaf. // Calculate lastPos on its children. calcLastPos(n->fLeftChild); calcLastPos(n->fRightChild); // Apply functions from table 3.40 in Aho if (n->fType == RBBINode::opOr) { setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); } else if (n->fType == RBBINode::opCat) { setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); if (n->fRightChild->fNullable) { setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); } } else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion || n->fType == RBBINode::opPlus) { setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); } }
//----------------------------------------------------------------------------- // // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 // //----------------------------------------------------------------------------- void RBBITableBuilder::calcLastPos(RBBINode * n) { if (n == NULL) { return; } if (n->fType == RBBINode::leafChar || n->fType == RBBINode::endMark || n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { // These are non-empty leaf node types. // Note: In order to maintain the sort invariant on the set, // this function should only be called on a node whose set is // empty to start with. n->fLastPosSet->addElement(n, *fStatus); return; } // The node is not a leaf. // Calculate lastPos on its children. calcLastPos(n->fLeftChild); calcLastPos(n->fRightChild); // Apply functions from table 3.40 in Aho if (n->fType == RBBINode::opOr) { setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); } else if (n->fType == RBBINode::opCat) { setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); if (n->fRightChild->fNullable) { setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); } } else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion || n->fType == RBBINode::opPlus) { setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); } }
//----------------------------------------------------------------------------- // // RBBITableBuilder::build - This is the main function for building the DFA state transtion // table from the RBBI rules parse tree. // //----------------------------------------------------------------------------- void RBBITableBuilder::build() { if (U_FAILURE(*fStatus)) { return; } // If there were no rules, just return. This situation can easily arise // for the reverse rules. if (fTree==NULL) { return; } // // Walk through the tree, replacing any references to $variables with a copy of the // parse tree for the substition expression. // fTree = fTree->flattenVariables(); #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { RBBIDebugPuts("Parse tree after flattening variable references."); fTree->printTree(TRUE); } #endif // // If the rules contained any references to {bof} // add a {bof} <cat> <former root of tree> to the // tree. Means that all matches must start out with the // {bof} fake character. // if (fRB->fSetBuilder->sawBOF()) { RBBINode *bofTop = new RBBINode(RBBINode::opCat); RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); bofTop->fLeftChild = bofLeaf; bofTop->fRightChild = fTree; bofLeaf->fParent = bofTop; bofLeaf->fVal = 2; // Reserved value for {bof}. fTree = bofTop; } // // Add a unique right-end marker to the expression. // Appears as a cat-node, left child being the original tree, // right child being the end marker. // RBBINode *cn = new RBBINode(RBBINode::opCat); cn->fLeftChild = fTree; fTree->fParent = cn; cn->fRightChild = new RBBINode(RBBINode::endMark); cn->fRightChild->fParent = cn; fTree = cn; // // Replace all references to UnicodeSets with the tree for the equivalent // expression. // fTree->flattenSets(); #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { RBBIDebugPuts("Parse tree after flattening Unicode Set references."); fTree->printTree(TRUE); } #endif // // calculate the functions nullable, firstpos, lastpos and followpos on // nodes in the parse tree. // See the alogrithm description in Aho. // Understanding how this works by looking at the code alone will be // nearly impossible. // calcNullable(fTree); calcFirstPos(fTree); calcLastPos(fTree); calcFollowPos(fTree); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { RBBIDebugPuts("\n"); printPosSets(fTree); } // // For "chained" rules, modify the followPos sets // if (fRB->fChainRules) { calcChainedFollowPos(fTree); } // // BOF (start of input) test fixup. // if (fRB->fSetBuilder->sawBOF()) { bofFixup(); } // // Build the DFA state transition tables. // buildStateTable(); flagAcceptingStates(); flagLookAheadStates(); flagTaggedStates(); // // Update the global table of rule status {tag} values // The rule builder has a global vector of status values that are common // for all tables. Merge the ones from this table into the global set. // mergeRuleStatusVals(); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();}; }