Beispiel #1
0
//-----------------------------------------------------------------------------
//
//   RBBITableBuilder::build  -  This is the main function for building the DFA state transtion
//                               table from the RBBI rules parse tree.
//
//-----------------------------------------------------------------------------
void  RBBITableBuilder::build() {

    if (U_FAILURE(*fStatus)) {
        return;
    }

    // If there were no rules, just return.  This situation can easily arise
    //   for the reverse rules.
    if (fTree==NULL) {
        return;
    }

    //
    // Walk through the tree, replacing any references to $variables with a copy of the
    //   parse tree for the substition expression.
    //
    fTree = fTree->flattenVariables();
#ifdef RBBI_DEBUG
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
        RBBIDebugPuts("Parse tree after flattening variable references.");
        fTree->printTree(TRUE);
    }
#endif

    //
    // If the rules contained any references to {bof} 
    //   add a {bof} <cat> <former root of tree> to the
    //   tree.  Means that all matches must start out with the 
    //   {bof} fake character.
    // 
    if (fRB->fSetBuilder->sawBOF()) {
        RBBINode *bofTop    = new RBBINode(RBBINode::opCat);
        RBBINode *bofLeaf   = new RBBINode(RBBINode::leafChar);
        // Delete and exit if memory allocation failed.
        if (bofTop == NULL || bofLeaf == NULL) {
            *fStatus = U_MEMORY_ALLOCATION_ERROR;
            delete bofTop;
            delete bofLeaf;
            return;
        }
        bofTop->fLeftChild  = bofLeaf;
        bofTop->fRightChild = fTree;
        bofLeaf->fParent    = bofTop;
        bofLeaf->fVal       = 2;      // Reserved value for {bof}.
        fTree               = bofTop;
    }

    //
    // Add a unique right-end marker to the expression.
    //   Appears as a cat-node, left child being the original tree,
    //   right child being the end marker.
    //
    RBBINode *cn = new RBBINode(RBBINode::opCat);
    // Exit if memory allocation failed.
    if (cn == NULL) {
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    cn->fLeftChild = fTree;
    fTree->fParent = cn;
    cn->fRightChild = new RBBINode(RBBINode::endMark);
    // Delete and exit if memory allocation failed.
    if (cn->fRightChild == NULL) {
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
        delete cn;
        return;
    }
    cn->fRightChild->fParent = cn;
    fTree = cn;

    //
    //  Replace all references to UnicodeSets with the tree for the equivalent
    //      expression.
    //
    fTree->flattenSets();
#ifdef RBBI_DEBUG
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
        RBBIDebugPuts("Parse tree after flattening Unicode Set references.");
        fTree->printTree(TRUE);
    }
#endif


    //
    // calculate the functions nullable, firstpos, lastpos and followpos on
    // nodes in the parse tree.
    //    See the alogrithm description in Aho.
    //    Understanding how this works by looking at the code alone will be
    //       nearly impossible.
    //
    calcNullable(fTree);
    calcFirstPos(fTree);
    calcLastPos(fTree);
    calcFollowPos(fTree);
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
        RBBIDebugPuts("\n");
        printPosSets(fTree);
    }

    //
    //  For "chained" rules, modify the followPos sets
    //
    if (fRB->fChainRules) {
        calcChainedFollowPos(fTree);
    }

    //
    //  BOF (start of input) test fixup.
    //
    if (fRB->fSetBuilder->sawBOF()) {
        bofFixup();
    }

    //
    // Build the DFA state transition tables.
    //
    buildStateTable();
    flagAcceptingStates();
    flagLookAheadStates();
    flagTaggedStates();

    //
    // Update the global table of rule status {tag} values
    // The rule builder has a global vector of status values that are common
    //    for all tables.  Merge the ones from this table into the global set.
    //
    mergeRuleStatusVals();

    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();};
}
Beispiel #2
0
//-----------------------------------------------------------------------------
//
//   RBBITableBuilder::build  -  This is the main function for building the DFA state transtion
//                               table from the RBBI rules parse tree.
//
//-----------------------------------------------------------------------------
void  RBBITableBuilder::build() {

    if (U_FAILURE(*fStatus)) {
        return;
    }

    // If there were no rules, just return.  This situation can easily arise
    //   for the reverse rules.
    if (fTree==NULL) {
        return;
    }

    //
    // Walk through the tree, replacing any references to $variables with a copy of the
    //   parse tree for the substition expression.
    //
    fTree = fTree->flattenVariables();
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
        RBBIDebugPrintf("Parse tree after flattening variable references.\n");
        fTree->printTree(TRUE);
    }

    //
    // Add a unique right-end marker to the expression.
    //   Appears as a cat-node, left child being the original tree,
    //   right child being the end marker.
    //
    RBBINode *cn = new RBBINode(RBBINode::opCat);
    cn->fLeftChild = fTree;
    fTree->fParent = cn;
    cn->fRightChild = new RBBINode(RBBINode::endMark);
    cn->fRightChild->fParent = cn;
    fTree = cn;

    //
    //  Replace all references to UnicodeSets with the tree for the equivalent
    //      expression.
    //
    fTree->flattenSets();
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
        RBBIDebugPrintf("Parse tree after flattening Unicode Set references.\n");
        fTree->printTree(TRUE);
    }


    //
    // calculate the functions nullable, firstpos, lastpos and followpos on
    // nodes in the parse tree.
    //    See the alogrithm description in Aho.
    //    Understanding how this works by looking at the code alone will be
    //       nearly impossible.
    //
    calcNullable(fTree);
    calcFirstPos(fTree);
    calcLastPos(fTree);
    calcFollowPos(fTree);
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
        RBBIDebugPrintf("\n\n");
        printPosSets(fTree);
    }

    //
    //  For "chained" rules, modify the followPos sets
    //
    if (fRB->fChainRules) {
        calcChainedFollowPos(fTree);
    }

    //
    // Build the DFA state transition tables.
    //
    buildStateTable();
    flagAcceptingStates();
    flagLookAheadStates();
    flagTaggedStates();

    //
    // Update the global table of rule status {tag} values
    // The rule builder has a global vector of status values that are common
    //    for all tables.  Merge the ones from this table into the global set.
    //
    mergeRuleStatusVals();

    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();};
}