Example #1
0
PARROT_EXPORT
void
string_set_data_directory(PARROT_INTERP, ARGIN(const char *dir))
{
    ASSERT_ARGS(string_set_data_directory)
#if PARROT_HAS_ICU
    u_setDataDirectory(dir);

    /* Since u_setDataDirectory doesn't have a result code, we'll spot
       check that everything is okay by making sure that '9' had decimal
       value 9. Using 57 rather than '9' so that the encoding of this
       source code file isn't an issue.... (Don't want to get bitten by
       EBCDIC.) */

    if (!u_isdigit(57) || (u_charDigitValue(57) != 9))
        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ICU_ERROR,
            "string_set_data_directory: ICU data files not found"
            "(apparently) for directory [%s]", dir);
#else
    UNUSED(dir);

    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ICU_ERROR,
        "string_set_data_directory: parrot compiled without ICU support");
#endif
}
Example #2
0
static void
printProps(UChar32 codePoint) {
    char buffer[100];
    UErrorCode errorCode;

    /* get the character name */
    errorCode=U_ZERO_ERROR;
    u_charName(codePoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);

    /* print the code point and the character name */
    printf("U+%04lx\t%s\n", codePoint, buffer);

    /* print some properties */
    printf("  general category (numeric enum value): %u\n", u_charType(codePoint));

    /* note: these APIs do not provide the data from SpecialCasing.txt */
    printf("  is lowercase: %d  uppercase: U+%04lx\n", u_islower(codePoint), u_toupper(codePoint));

    printf("  is digit: %d  decimal digit value: %d\n", u_isdigit(codePoint), u_charDigitValue(codePoint));

    printf("  BiDi directional category (numeric enum value): %u\n", u_charDirection(codePoint));
}
Example #3
0
UBool NumberingSystem::isValidDigitString(const UnicodeString& str) {

    StringCharacterIterator it(str);
    UChar32 c;
    UChar32 prev = 0;
    int32_t i = 0;

    for ( it.setToStart(); it.hasNext(); ) {
       c = it.next32PostInc();
       if ( u_charDigitValue(c) != i ) {  // Digits outside the Unicode decimal digit class are not currently supported
           return FALSE;
       }
       if ( prev != 0 && c != prev + 1 ) { // Non-contiguous digits are not currently supported
          return FALSE;
       }
       if ( c > 0xFFFF ) { // Digits outside the BMP are not currently supported
          return FALSE;
       }
       i++;
       prev = c;
    }
    return TRUE;   
}
Example #4
0
//----------------------------------------------------------------------------------------
//
//  doParseAction        Do some action during rule parsing.
//                       Called by the parse state machine.
//                       Actions build the parse tree and Unicode Sets,
//                       and maintain the parse stack for nested expressions.
//
//                       TODO:  unify EParseAction and RBBI_RuleParseAction enum types.
//                              They represent exactly the same thing.  They're separate
//                              only to work around enum forward declaration restrictions
//                              in some compilers, while at the same time avoiding multiple
//                              definitions problems.  I'm sure that there's a better way.
//
//----------------------------------------------------------------------------------------
UBool RBBIRuleScanner::doParseActions(EParseAction action)
{
    RBBINode *n       = NULL;

    UBool   returnVal = TRUE;

    switch ((RBBI_RuleParseAction)action) {

    case doExprStart:
        pushNewNode(RBBINode::opStart);
        fRuleNum++;
        break;


    case doExprOrOperator:
    {
        fixOpStack(RBBINode::precOpCat);
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *orNode      = pushNewNode(RBBINode::opOr);
        orNode->fLeftChild     = operandNode;
        operandNode->fParent   = orNode;
    }
    break;

    case doExprCatOperator:
        // concatenation operator.
        // For the implicit concatenation of adjacent terms in an expression that are
        //   not separated by any other operator.  Action is invoked between the
        //   actions for the two terms.
    {
        fixOpStack(RBBINode::precOpCat);
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *catNode     = pushNewNode(RBBINode::opCat);
        catNode->fLeftChild    = operandNode;
        operandNode->fParent   = catNode;
    }
    break;

    case doLParen:
        // Open Paren.
        //   The openParen node is a dummy operation type with a low precedence,
        //     which has the affect of ensuring that any real binary op that
        //     follows within the parens binds more tightly to the operands than
        //     stuff outside of the parens.
        pushNewNode(RBBINode::opLParen);
        break;

    case doExprRParen:
        fixOpStack(RBBINode::precLParen);
        break;

    case doNOP:
        break;

    case doStartAssign:
        // We've just scanned "$variable = "
        // The top of the node stack has the $variable ref node.

        // Save the start position of the RHS text in the StartExpression node
        //   that precedes the $variableReference node on the stack.
        //   This will eventually be used when saving the full $variable replacement
        //   text as a string.
        n = fNodeStack[fNodeStackPtr-1];
        n->fFirstPos = fNextIndex;              // move past the '='

        // Push a new start-of-expression node; needed to keep parse of the
        //   RHS expression happy.
        pushNewNode(RBBINode::opStart);
        break;




    case doEndAssign:
    {
        // We have reached the end of an assignement statement.
        //   Current scan char is the ';' that terminates the assignment.

        // Terminate expression, leaves expression parse tree rooted in TOS node.
        fixOpStack(RBBINode::precStart);

        RBBINode *startExprNode  = fNodeStack[fNodeStackPtr-2];
        RBBINode *varRefNode     = fNodeStack[fNodeStackPtr-1];
        RBBINode *RHSExprNode    = fNodeStack[fNodeStackPtr];

        // Save original text of right side of assignment, excluding the terminating ';'
        //  in the root of the node for the right-hand-side expression.
        RHSExprNode->fFirstPos = startExprNode->fFirstPos;
        RHSExprNode->fLastPos  = fScanIndex;
        fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText);

        // Expression parse tree becomes l. child of the $variable reference node.
        varRefNode->fLeftChild = RHSExprNode;
        RHSExprNode->fParent   = varRefNode;

        // Make a symbol table entry for the $variableRef node.
        fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus);
        if (U_FAILURE(*fRB->fStatus)) {
            // This is a round-about way to get the parse position set
            //  so that duplicate symbols error messages include a line number.
            UErrorCode t = *fRB->fStatus;
            *fRB->fStatus = U_ZERO_ERROR;
            error(t);
        }

        // Clean up the stack.
        delete startExprNode;
        fNodeStackPtr-=3;
        break;
    }

    case doEndOfRule:
    {
        fixOpStack(RBBINode::precStart);      // Terminate expression, leaves expression
        if (U_FAILURE(*fRB->fStatus)) {       //   parse tree rooted in TOS node.
            break;
        }
#ifdef RBBI_DEBUG
        if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {
            printNodeStack("end of rule");
        }
#endif
        U_ASSERT(fNodeStackPtr == 1);

        // If this rule includes a look-ahead '/', add a endMark node to the
        //   expression tree.
        if (fLookAheadRule) {
            RBBINode  *thisRule       = fNodeStack[fNodeStackPtr];
            RBBINode  *endNode        = pushNewNode(RBBINode::endMark);
            RBBINode  *catNode        = pushNewNode(RBBINode::opCat);
            fNodeStackPtr -= 2;
            catNode->fLeftChild       = thisRule;
            catNode->fRightChild      = endNode;
            fNodeStack[fNodeStackPtr] = catNode;
            endNode->fVal             = fRuleNum;
            endNode->fLookAheadEnd    = TRUE;
        }

        // All rule expressions are ORed together.
        // The ';' that terminates an expression really just functions as a '|' with
        //   a low operator prededence.
        //
        // Each of the four sets of rules are collected separately.
        //  (forward, reverse, safe_forward, safe_reverse)
        //  OR this rule into the appropriate group of them.
        //
        RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefaultTree);

        if (*destRules != NULL) {
            // This is not the first rule encounted.
            // OR previous stuff  (from *destRules)
            // with the current rule expression (on the Node Stack)
            //  with the resulting OR expression going to *destRules
            //
            RBBINode  *thisRule    = fNodeStack[fNodeStackPtr];
            RBBINode  *prevRules   = *destRules;
            RBBINode  *orNode      = pushNewNode(RBBINode::opOr);
            orNode->fLeftChild     = prevRules;
            prevRules->fParent     = orNode;
            orNode->fRightChild    = thisRule;
            thisRule->fParent      = orNode;
            *destRules             = orNode;
        }
        else
        {
            // This is the first rule encountered (for this direction).
            // Just move its parse tree from the stack to *destRules.
            *destRules = fNodeStack[fNodeStackPtr];
        }
        fReverseRule   = FALSE;   // in preparation for the next rule.
        fLookAheadRule = FALSE;
        fNodeStackPtr  = 0;
    }
    break;


    case doRuleError:
        error(U_BRK_RULE_SYNTAX);
        returnVal = FALSE;
        break;


    case doVariableNameExpectedErr:
        error(U_BRK_RULE_SYNTAX);
        break;


    //
    //  Unary operands  + ? *
    //    These all appear after the operand to which they apply.
    //    When we hit one, the operand (may be a whole sub expression)
    //    will be on the top of the stack.
    //    Unary Operator becomes TOS, with the old TOS as its one child.
    case doUnaryOpPlus:
    {
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *plusNode    = pushNewNode(RBBINode::opPlus);
        plusNode->fLeftChild   = operandNode;
        operandNode->fParent   = plusNode;
    }
    break;

    case doUnaryOpQuestion:
    {
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *qNode       = pushNewNode(RBBINode::opQuestion);
        qNode->fLeftChild      = operandNode;
        operandNode->fParent   = qNode;
    }
    break;

    case doUnaryOpStar:
    {
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *starNode    = pushNewNode(RBBINode::opStar);
        starNode->fLeftChild   = operandNode;
        operandNode->fParent   = starNode;
    }
    break;

    case doRuleChar:
        // A "Rule Character" is any single character that is a literal part
        // of the regular expression.  Like a, b and c in the expression "(abc*) | [:L:]"
        // These are pretty uncommon in break rules; the terms are more commonly
        //  sets.  To keep things uniform, treat these characters like as
        // sets that just happen to contain only one character.
    {
        n = pushNewNode(RBBINode::setRef);
        findSetFor(fC.fChar, n);
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        break;
    }

    case doDotAny:
        // scanned a ".", meaning match any single character.
    {
        n = pushNewNode(RBBINode::setRef);
        findSetFor(kAny, n);
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        break;
    }

    case doSlash:
        // Scanned a '/', which identifies a look-ahead break position in a rule.
        n = pushNewNode(RBBINode::lookAhead);
        n->fVal      = fRuleNum;
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        fLookAheadRule = TRUE;
        break;


    case doStartTagValue:
        // Scanned a '{', the opening delimiter for a tag value within a rule.
        n = pushNewNode(RBBINode::tag);
        n->fVal      = 0;
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        break;

    case doTagDigit:
        // Just scanned a decimal digit that's part of a tag value
    {
        n = fNodeStack[fNodeStackPtr];
        uint32_t v = u_charDigitValue(fC.fChar);
        U_ASSERT(v < 10);
        n->fVal = n->fVal*10 + v;
        break;
    }

    case doTagValue:
        n = fNodeStack[fNodeStackPtr];
        n->fLastPos = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        break;

    case doTagExpectedError:
        error(U_BRK_MALFORMED_RULE_TAG);
        returnVal = FALSE;
        break;

    case doOptionStart:
        // Scanning a !!option.   At the start of string.
        fOptionStart = fScanIndex;
        break;

    case doOptionEnd:
    {
        UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart);
        if (opt == UNICODE_STRING("chain", 5)) {
            fRB->fChainRules = TRUE;
        } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) {
            fRB->fLBCMNoChain = TRUE;
        } else if (opt == UNICODE_STRING("forward", 7)) {
            fRB->fDefaultTree   = &fRB->fForwardTree;
        } else if (opt == UNICODE_STRING("reverse", 7)) {
            fRB->fDefaultTree   = &fRB->fReverseTree;
        } else if (opt == UNICODE_STRING("safe_forward", 12)) {
            fRB->fDefaultTree   = &fRB->fSafeFwdTree;
        } else if (opt == UNICODE_STRING("safe_reverse", 12)) {
            fRB->fDefaultTree   = &fRB->fSafeRevTree;
        } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
            fRB->fLookAheadHardBreak = TRUE;
        } else {
            error(U_BRK_UNRECOGNIZED_OPTION);
        }
    }
    break;

    case doReverseDir:
        fReverseRule = TRUE;
        break;

    case doStartVariableName:
        n = pushNewNode(RBBINode::varRef);
        if (U_FAILURE(*fRB->fStatus)) {
            break;
        }
        n->fFirstPos = fScanIndex;
        break;

    case doEndVariableName:
        n = fNodeStack[fNodeStackPtr];
        if (n==NULL || n->fType != RBBINode::varRef) {
            error(U_BRK_INTERNAL_ERROR);
            break;
        }
        n->fLastPos = fScanIndex;
        fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText);
        // Look the newly scanned name up in the symbol table
        //   If there's an entry, set the l. child of the var ref to the replacement expression.
        //   (We also pass through here when scanning assignments, but no harm is done, other
        //    than a slight wasted effort that seems hard to avoid.  Lookup will be null)
        n->fLeftChild = fSymbolTable->lookupNode(n->fText);
        break;

    case doCheckVarDef:
        n = fNodeStack[fNodeStackPtr];
        if (n->fLeftChild == NULL) {
            error(U_BRK_UNDEFINED_VARIABLE);
            returnVal = FALSE;
        }
        break;

    case doExprFinished:
        break;

    case doRuleErrorAssignExpr:
        error(U_BRK_ASSIGN_ERROR);
        returnVal = FALSE;
        break;

    case doExit:
        returnVal = FALSE;
        break;

    case doScanUnicodeSet:
        scanSet();
        break;

    default:
        error(U_BRK_INTERNAL_ERROR);
        returnVal = FALSE;
        break;
    }
    return returnVal;
}
Example #5
0
int32_t __hs_u_charDigitValue(UChar32 c)
{
    return u_charDigitValue(c);
}