//----------------------------------------------------------------------------- // // RBBITableBuilder::build - This is the main function for building the DFA state transtion // table from the RBBI rules parse tree. // //----------------------------------------------------------------------------- void RBBITableBuilder::build() { if (U_FAILURE(*fStatus)) { return; } // If there were no rules, just return. This situation can easily arise // for the reverse rules. if (fTree==NULL) { return; } // // Walk through the tree, replacing any references to $variables with a copy of the // parse tree for the substition expression. // fTree = fTree->flattenVariables(); #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { RBBIDebugPuts("Parse tree after flattening variable references."); fTree->printTree(TRUE); } #endif // // If the rules contained any references to {bof} // add a {bof} <cat> <former root of tree> to the // tree. Means that all matches must start out with the // {bof} fake character. // if (fRB->fSetBuilder->sawBOF()) { RBBINode *bofTop = new RBBINode(RBBINode::opCat); RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); // Delete and exit if memory allocation failed. if (bofTop == NULL || bofLeaf == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; delete bofTop; delete bofLeaf; return; } bofTop->fLeftChild = bofLeaf; bofTop->fRightChild = fTree; bofLeaf->fParent = bofTop; bofLeaf->fVal = 2; // Reserved value for {bof}. fTree = bofTop; } // // Add a unique right-end marker to the expression. // Appears as a cat-node, left child being the original tree, // right child being the end marker. // RBBINode *cn = new RBBINode(RBBINode::opCat); // Exit if memory allocation failed. if (cn == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; return; } cn->fLeftChild = fTree; fTree->fParent = cn; cn->fRightChild = new RBBINode(RBBINode::endMark); // Delete and exit if memory allocation failed. if (cn->fRightChild == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; delete cn; return; } cn->fRightChild->fParent = cn; fTree = cn; // // Replace all references to UnicodeSets with the tree for the equivalent // expression. // fTree->flattenSets(); #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { RBBIDebugPuts("Parse tree after flattening Unicode Set references."); fTree->printTree(TRUE); } #endif // // calculate the functions nullable, firstpos, lastpos and followpos on // nodes in the parse tree. // See the alogrithm description in Aho. // Understanding how this works by looking at the code alone will be // nearly impossible. // calcNullable(fTree); calcFirstPos(fTree); calcLastPos(fTree); calcFollowPos(fTree); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { RBBIDebugPuts("\n"); printPosSets(fTree); } // // For "chained" rules, modify the followPos sets // if (fRB->fChainRules) { calcChainedFollowPos(fTree); } // // BOF (start of input) test fixup. // if (fRB->fSetBuilder->sawBOF()) { bofFixup(); } // // Build the DFA state transition tables. // buildStateTable(); flagAcceptingStates(); flagLookAheadStates(); flagTaggedStates(); // // Update the global table of rule status {tag} values // The rule builder has a global vector of status values that are common // for all tables. Merge the ones from this table into the global set. // mergeRuleStatusVals(); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();}; }
//----------------------------------------------------------------------------- // // RBBITableBuilder::build - This is the main function for building the DFA state transtion // table from the RBBI rules parse tree. // //----------------------------------------------------------------------------- void RBBITableBuilder::build() { if (U_FAILURE(*fStatus)) { return; } // If there were no rules, just return. This situation can easily arise // for the reverse rules. if (fTree==NULL) { return; } // // Walk through the tree, replacing any references to $variables with a copy of the // parse tree for the substition expression. // fTree = fTree->flattenVariables(); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { RBBIDebugPrintf("Parse tree after flattening variable references.\n"); fTree->printTree(TRUE); } // // Add a unique right-end marker to the expression. // Appears as a cat-node, left child being the original tree, // right child being the end marker. // RBBINode *cn = new RBBINode(RBBINode::opCat); cn->fLeftChild = fTree; fTree->fParent = cn; cn->fRightChild = new RBBINode(RBBINode::endMark); cn->fRightChild->fParent = cn; fTree = cn; // // Replace all references to UnicodeSets with the tree for the equivalent // expression. // fTree->flattenSets(); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { RBBIDebugPrintf("Parse tree after flattening Unicode Set references.\n"); fTree->printTree(TRUE); } // // calculate the functions nullable, firstpos, lastpos and followpos on // nodes in the parse tree. // See the alogrithm description in Aho. // Understanding how this works by looking at the code alone will be // nearly impossible. // calcNullable(fTree); calcFirstPos(fTree); calcLastPos(fTree); calcFollowPos(fTree); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { RBBIDebugPrintf("\n\n"); printPosSets(fTree); } // // For "chained" rules, modify the followPos sets // if (fRB->fChainRules) { calcChainedFollowPos(fTree); } // // Build the DFA state transition tables. // buildStateTable(); flagAcceptingStates(); flagLookAheadStates(); flagTaggedStates(); // // Update the global table of rule status {tag} values // The rule builder has a global vector of status values that are common // for all tables. Merge the ones from this table into the global set. // mergeRuleStatusVals(); if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();}; }