static AvoidanceReasonFlags canUseForText(const CharacterType* text, unsigned length, const Font& font, FallThrough fallthrough)
{
    AvoidanceReasonFlags reasons = NoReason;
    // FIXME: <textarea maxlength=0> generates empty text node.
    if (!length)
        SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextIsEmpty, fallthrough);

    for (unsigned i = 0; i < length; ++i) {
        UChar character = text[i];
        if (character == ' ')
            continue;

        // These would be easy to support.
        if (character == noBreakSpace)
            SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextHasNoBreakSpace, fallthrough);
        if (character == softHyphen)
            SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextHasSoftHyphen, fallthrough);

        UCharDirection direction = u_charDirection(character);
        if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC
            || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE
            || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE
            || direction == U_POP_DIRECTIONAL_FORMAT || direction == U_BOUNDARY_NEUTRAL)
            SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextHasDirectionCharacter, fallthrough);

        if (!font.glyphForCharacter(character))
            SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowFontIsMissingGlyph, fallthrough);
    }
    return reasons;
}
/* static */
bool BoxChar::ContainsMostlyRTL(const vector<BoxChar*>& boxes) {
  int num_rtl = 0, num_ltr = 0;
  for (int i = 0; i < boxes.size(); ++i) {
    // Convert the unichar to UTF32 representation
    GenericVector<char32> uni_vector;
    if (!UNICHAR::UTF8ToUnicode(boxes[i]->ch_.c_str(), &uni_vector)) {
      tprintf("Illegal utf8 in boxchar %d string:%s = ", i,
              boxes[i]->ch_.c_str());
      for (int c = 0; c < boxes[i]->ch_.size(); ++c) {
        tprintf(" 0x%x", boxes[i]->ch_[c]);
      }
      tprintf("\n");
      continue;
    }
    for (int j = 0; j < uni_vector.size(); ++j) {
      UCharDirection dir = u_charDirection(uni_vector[j]);
      if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC ||
          dir == U_ARABIC_NUMBER) {
        ++num_rtl;
      } else {
        ++num_ltr;
      }
    }
  }
  return num_rtl > num_ltr;
}
static bool canUseForText(const CharacterType* text, unsigned length, const SimpleFontData& fontData)
{
    // FIXME: <textarea maxlength=0> generates empty text node.
    if (!length)
        return false;
    for (unsigned i = 0; i < length; ++i) {
        UChar character = text[i];
        if (character == ' ')
            continue;

        // These would be easy to support.
        if (character == noBreakSpace)
            return false;
        if (character == softHyphen)
            return false;

        UCharDirection direction = u_charDirection(character);
        if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC
            || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE
            || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE
            || direction == U_POP_DIRECTIONAL_FORMAT || direction == U_BOUNDARY_NEUTRAL)
            return false;

        if (!fontData.glyphForCharacter(character))
            return false;
    }
    return true;
}
static bool isOneLeftToRightRun(const TextRun& run)
{
    for (int i = 0; i < run.length(); i++) {
        UCharDirection direction = u_charDirection(run[i]);
        if (direction == U_RIGHT_TO_LEFT || direction > U_OTHER_NEUTRAL)
            return false;
    }
    return true;
}
static void getDirectionalities(JNIEnv* env, jobject obj, jcharArray srcArray, jbyteArray destArray, int count)
{
    jchar* src = env->GetCharArrayElements(srcArray, NULL);
    jbyte* dest = env->GetByteArrayElements(destArray, NULL);
    if (src == NULL || dest == NULL) {
        jniThrowException(env, "java/lang/NullPointerException", NULL);
        goto DIRECTION_END;
    }

    if (env->GetArrayLength(srcArray) < count || env->GetArrayLength(destArray) < count) {
        jniThrowException(env, "java/lang/ArrayIndexOutOfBoundsException", NULL);
        goto DIRECTION_END;
    }

    for (int i = 0; i < count; i++) {
        if (src[i] >= 0xD800 && src[i] <= 0xDBFF &&
            i + 1 < count &&
            src[i + 1] >= 0xDC00 && src[i + 1] <= 0xDFFF) {
            int c = 0x00010000 + ((src[i] - 0xD800) << 10) +
                                 (src[i + 1] & 0x3FF);
            int dir = u_charDirection(c);
            if (dir < 0 || dir >= U_CHAR_DIRECTION_COUNT)
                dir = PROPERTY_UNDEFINED;
            else
                dir = directionality_map[dir];

            dest[i++] = dir;
            dest[i] = dir;
        } else {
            int c = src[i];
            int dir = u_charDirection(c);
            if (dir < 0 || dir >= U_CHAR_DIRECTION_COUNT)
                dest[i] = PROPERTY_UNDEFINED;
            else
                dest[i] = directionality_map[dir];
        }
    }
    
DIRECTION_END:
    env->ReleaseCharArrayElements(srcArray, src, JNI_ABORT);
    env->ReleaseByteArrayElements(destArray, dest, JNI_ABORT);
}
Beispiel #6
0
 virtual void call(UErrorCode* pErrorCode) {
     const UChar *buffer=testcase.getBuffer();
     int32_t length=testcase.getBufferLen();
     UChar32 c;
     int32_t i;
     uint32_t bitSet=0;
     for(i=0; i<length;) {
         U16_NEXT(buffer, i, length, c);
         bitSet|=(uint32_t)1<<u_charDirection(c);
     }
     if(length>0 && bitSet==0) {
         fprintf(stderr, "error: GetBiDiClass() did not collect bits\n");
     }
 }
Beispiel #7
0
void VTTCue::determineTextDirection()
{
    DEPRECATED_DEFINE_STATIC_LOCAL(const String, rtTag, (ASCIILiteral("rt")));
    createWebVTTNodeTree();
    if (!m_webVTTNodeTree)
        return;

    // Apply the Unicode Bidirectional Algorithm's Paragraph Level steps to the
    // concatenation of the values of each WebVTT Text Object in nodes, in a
    // pre-order, depth-first traversal, excluding WebVTT Ruby Text Objects and
    // their descendants.
    StringBuilder paragraphBuilder;
    for (Node* node = m_webVTTNodeTree->firstChild(); node; node = NodeTraversal::next(node, m_webVTTNodeTree.get())) {
        // FIXME: The code does not match the comment above. This does not actually exclude Ruby Text Object descendant.
        if (!node->isTextNode() || node->localName() == rtTag)
            continue;

        paragraphBuilder.append(node->nodeValue());
    }

    String paragraph = paragraphBuilder.toString();
    if (!paragraph.length())
        return;

    for (size_t i = 0; i < paragraph.length(); ++i) {
        UChar current = paragraph[i];
        if (!current || isCueParagraphSeparator(current))
            return;

        if (UChar current = paragraph[i]) {
            UCharDirection charDirection = u_charDirection(current);
            if (charDirection == U_LEFT_TO_RIGHT) {
                m_displayDirection = CSSValueLtr;
                return;
            }
            if (charDirection == U_RIGHT_TO_LEFT || charDirection == U_RIGHT_TO_LEFT_ARABIC) {
                m_displayDirection = CSSValueRtl;
                return;
            }
        }
    }
}
Beispiel #8
0
// Increments *num_rtl and *num_ltr according to the directionality of
// characters in the box.
void BoxChar::GetDirection(int* num_rtl, int* num_ltr) const {
  // Convert the unichar to UTF32 representation
  std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(ch_.c_str());
  if (uni_vector.empty()) {
    tprintf("Illegal utf8 in boxchar string:%s = ", ch_.c_str());
    for (int c = 0; c < ch_.size(); ++c) {
      tprintf(" 0x%x", ch_[c]);
    }
    tprintf("\n");
    return;
  }
  for (char32 ch : uni_vector) {
    UCharDirection dir = u_charDirection(ch);
    if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC ||
        dir == U_ARABIC_NUMBER || dir == U_RIGHT_TO_LEFT_ISOLATE) {
      ++*num_rtl;
    } else if (dir != U_DIR_NON_SPACING_MARK && dir != U_BOUNDARY_NEUTRAL) {
      ++*num_ltr;
    }
  }
}
Beispiel #9
0
static void
printProps(UChar32 codePoint) {
    char buffer[100];
    UErrorCode errorCode;

    /* get the character name */
    errorCode=U_ZERO_ERROR;
    u_charName(codePoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);

    /* print the code point and the character name */
    printf("U+%04lx\t%s\n", codePoint, buffer);

    /* print some properties */
    printf("  general category (numeric enum value): %u\n", u_charType(codePoint));

    /* note: these APIs do not provide the data from SpecialCasing.txt */
    printf("  is lowercase: %d  uppercase: U+%04lx\n", u_islower(codePoint), u_toupper(codePoint));

    printf("  is digit: %d  decimal digit value: %d\n", u_isdigit(codePoint), u_charDigitValue(codePoint));

    printf("  BiDi directional category (numeric enum value): %u\n", u_charDirection(codePoint));
}
Beispiel #10
0
bool canUseFor(const RenderBlockFlow& flow)
{
#if !PLATFORM(MAC)
    // FIXME: Non-mac platforms are hitting ASSERT(run.charactersLength() >= run.length())
    // https://bugs.webkit.org/show_bug.cgi?id=123338
    return false;
#endif
    if (!flow.firstChild())
        return false;
    // This currently covers <blockflow>#text</blockflow> case.
    // The <blockflow><inline>#text</inline></blockflow> case is also popular and should be relatively easy to cover.
    if (flow.firstChild() != flow.lastChild())
        return false;
    if (!flow.firstChild()->isText())
        return false;
    // Supporting floats would be very beneficial.
    if (flow.containsFloats())
        return false;
    if (!flow.isHorizontalWritingMode())
        return false;
    if (flow.flowThreadState() != RenderObject::NotInsideFlowThread)
        return false;
    if (flow.hasOutline())
        return false;
    if (flow.isRubyText() || flow.isRubyBase())
        return false;
    // These tests only works during layout. Outside layout this function may give false positives.
    if (flow.view().layoutState()) {
#if ENABLE(CSS_SHAPES)
        if (flow.view().layoutState()->shapeInsideInfo())
            return false;
#endif
        if (flow.view().layoutState()->m_columnInfo)
            return false;
    }
    const RenderStyle& style = *flow.style();
    // It shoudn't be hard to support other alignments.
    if (style.textAlign() != LEFT && style.textAlign() != WEBKIT_LEFT && style.textAlign() != TASTART)
        return false;
    // Non-visible overflow should be pretty easy to support.
    if (style.overflowX() != OVISIBLE || style.overflowY() != OVISIBLE)
        return false;
    // Pre/no-wrap would be very helpful to support.
    if (style.whiteSpace() != NORMAL)
        return false;
    if (!style.textIndent().isZero())
        return false;
    if (style.wordSpacing() || style.letterSpacing())
        return false;
    if (style.textTransform() != TTNONE)
        return false;
    if (!style.isLeftToRightDirection())
        return false;
    if (style.lineBoxContain() != RenderStyle::initialLineBoxContain())
        return false;
    if (style.writingMode() != TopToBottomWritingMode)
        return false;
    if (style.lineBreak() != LineBreakAuto)
        return false;
    if (style.wordBreak() != NormalWordBreak)
        return false;
    if (style.unicodeBidi() != UBNormal || style.rtlOrdering() != LogicalOrder)
        return false;
    if (style.lineAlign() != LineAlignNone || style.lineSnap() != LineSnapNone)
        return false;
    if (style.hyphens() == HyphensAuto)
        return false;
    if (style.textEmphasisFill() != TextEmphasisFillFilled || style.textEmphasisMark() != TextEmphasisMarkNone)
        return false;
    if (style.textShadow())
        return false;
#if ENABLE(CSS_SHAPES)
    if (style.resolvedShapeInside())
        return true;
#endif
    if (style.textOverflow() || (flow.isAnonymousBlock() && flow.parent()->style()->textOverflow()))
        return false;
    if (style.hasPseudoStyle(FIRST_LINE) || style.hasPseudoStyle(FIRST_LETTER))
        return false;
    if (style.hasTextCombine())
        return false;
    if (style.overflowWrap() != NormalOverflowWrap)
        return false;
    if (style.backgroundClip() == TextFillBox)
        return false;
    if (style.borderFit() == BorderFitLines)
        return false;
    const RenderText& textRenderer = toRenderText(*flow.firstChild());
    if (textRenderer.isCombineText() || textRenderer.isCounter() || textRenderer.isQuote() || textRenderer.isTextFragment()
#if ENABLE(SVG)
        || textRenderer.isSVGInlineText()
#endif
        )
        return false;
    if (style.font().codePath(TextRun(textRenderer.text())) != Font::Simple)
        return false;

    auto primaryFontData = style.font().primaryFont();

    unsigned length = textRenderer.textLength();
    unsigned consecutiveSpaceCount = 0;
    for (unsigned i = 0; i < length; ++i) {
        // This rejects anything with more than one consecutive whitespace, except at the beginning or end.
        // This is because we don't currently do subruns within lines. Fixing this would improve coverage significantly.
        UChar character = textRenderer.characterAt(i);
        if (isWhitespace(character))
            ++consecutiveSpaceCount;
        else {
            if (consecutiveSpaceCount != i && consecutiveSpaceCount > 1)
                return false;
            consecutiveSpaceCount = 0;
        }
        // These would be easy to support.
        if (character == noBreakSpace)
            return false;
        if (character == softHyphen)
            return false;

        static const UChar lowestRTLCharacter = 0x590;
        if (character >= lowestRTLCharacter) {
            UCharDirection direction = u_charDirection(character);
            if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC
                || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE
                || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE)
                return false;
        }
        if (!primaryFontData->glyphForCharacter(character))
            return false;
    }
    return true;
}
Beispiel #11
0
/*
 * Get the directional properties for the text,
 * calculate the flags bit-set, and
 * determine the partagraph level if necessary.
 */
static void
getDirProps(UBiDi *pBiDi, const UChar *text) {
    DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */

    int32_t i=0, i0, i1, length=pBiDi->length;
    Flags flags=0;      /* collect all directionalities in the text */
    UChar uchar;
    DirProp dirProp;

    if(IS_DEFAULT_LEVEL(pBiDi->paraLevel)) {
        /* determine the paragraph level (P2..P3) */
        for(;;) {
            uchar=text[i];
            if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(text[i+1])) {
                /* not a surrogate pair */
                flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_charDirection(uchar));
            } else {
                /* a surrogate pair */
                dirProps[i++]=BN;   /* first surrogate in the pair gets the BN type */
                flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_surrogatePairDirection(uchar, text[i]))|DIRPROP_FLAG(BN);
            }
            ++i;
            if(dirProp==L) {
                pBiDi->paraLevel=0;
                break;
            } else if(dirProp==R || dirProp==AL) {
                pBiDi->paraLevel=1;
                break;
            } else if(i>=length) {
                /*
                 * see comment in ubidi.h:
                 * the DEFAULT_XXX values are designed so that
                 * their bit 0 alone yields the intended default
                 */
                pBiDi->paraLevel&=1;
                break;
            }
        }
    } else {
        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    }

    /* get the rest of the directional properties and the flags bits */
    while(i<length) {
        uchar=text[i];
        if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(text[i+1])) {
            /* not a surrogate pair */
            flags|=DIRPROP_FLAG(dirProps[i]=u_charDirection(uchar));
        } else {
            /* a surrogate pair */
            dirProps[i++]=BN;   /* first surrogate in the pair gets the BN type */
            flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_surrogatePairDirection(uchar, text[i]))|DIRPROP_FLAG(BN);
        }
        ++i;
    }
    if(flags&MASK_EMBEDDING) {
        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    }

    pBiDi->flags=flags;
}
Beispiel #12
0
 UCharDirection direction() const { return atEnd() ? U_OTHER_NEUTRAL : u_charDirection(current()); }
bool canUseFor(const RenderBlockFlow& flow)
{
#if !PLATFORM(MAC) && !PLATFORM(GTK) && !PLATFORM(EFL)
    // FIXME: Non-mac platforms are hitting ASSERT(run.charactersLength() >= run.length())
    // https://bugs.webkit.org/show_bug.cgi?id=123338
    return false;
#endif
    if (!flow.frame().settings().simpleLineLayoutEnabled())
        return false;
    if (!flow.firstChild())
        return false;
    // This currently covers <blockflow>#text</blockflow> case.
    // The <blockflow><inline>#text</inline></blockflow> case is also popular and should be relatively easy to cover.
    if (flow.firstChild() != flow.lastChild())
        return false;
    if (!flow.firstChild()->isText())
        return false;
    // Supporting floats would be very beneficial.
    if (flow.containsFloats())
        return false;
    if (!flow.isHorizontalWritingMode())
        return false;
    if (flow.flowThreadState() != RenderObject::NotInsideFlowThread)
        return false;
    if (flow.hasOutline())
        return false;
    if (flow.isRubyText() || flow.isRubyBase())
        return false;
    if (flow.parent()->isDeprecatedFlexibleBox())
        return false;
    // These tests only works during layout. Outside layout this function may give false positives.
    if (flow.view().layoutState()) {
#if ENABLE(CSS_SHAPES)
        if (flow.view().layoutState()->shapeInsideInfo())
            return false;
#endif
        if (flow.view().layoutState()->m_columnInfo)
            return false;
    }
    const RenderStyle& style = flow.style();
    if (style.textDecorationsInEffect() != TextDecorationNone)
        return false;
    if (style.textAlign() == JUSTIFY)
        return false;
    // Non-visible overflow should be pretty easy to support.
    if (style.overflowX() != OVISIBLE || style.overflowY() != OVISIBLE)
        return false;
    // Pre/no-wrap would be very helpful to support.
    if (style.whiteSpace() != NORMAL)
        return false;
    if (!style.textIndent().isZero())
        return false;
    if (style.wordSpacing() || style.letterSpacing())
        return false;
    if (style.textTransform() != TTNONE)
        return false;
    if (!style.isLeftToRightDirection())
        return false;
    if (style.lineBoxContain() != RenderStyle::initialLineBoxContain())
        return false;
    if (style.writingMode() != TopToBottomWritingMode)
        return false;
    if (style.lineBreak() != LineBreakAuto)
        return false;
    if (style.wordBreak() != NormalWordBreak)
        return false;
    if (style.unicodeBidi() != UBNormal || style.rtlOrdering() != LogicalOrder)
        return false;
    if (style.lineAlign() != LineAlignNone || style.lineSnap() != LineSnapNone)
        return false;
    if (style.hyphens() == HyphensAuto)
        return false;
    if (style.textEmphasisFill() != TextEmphasisFillFilled || style.textEmphasisMark() != TextEmphasisMarkNone)
        return false;
    if (style.textShadow())
        return false;
#if ENABLE(CSS_SHAPES)
    if (style.resolvedShapeInside())
        return true;
#endif
    if (style.textOverflow() || (flow.isAnonymousBlock() && flow.parent()->style().textOverflow()))
        return false;
    if (style.hasPseudoStyle(FIRST_LINE) || style.hasPseudoStyle(FIRST_LETTER))
        return false;
    if (style.hasTextCombine())
        return false;
    if (style.overflowWrap() != NormalOverflowWrap)
        return false;
    if (style.backgroundClip() == TextFillBox)
        return false;
    if (style.borderFit() == BorderFitLines)
        return false;
    const RenderText& textRenderer = toRenderText(*flow.firstChild());
    if (textRenderer.isCombineText() || textRenderer.isCounter() || textRenderer.isQuote() || textRenderer.isTextFragment()
#if ENABLE(SVG)
        || textRenderer.isSVGInlineText()
#endif
        )
        return false;
    if (style.font().codePath(TextRun(textRenderer.text())) != Font::Simple)
        return false;

    // We assume that all lines have metrics based purely on the primary font.
    auto& primaryFontData = *style.font().primaryFont();
    if (primaryFontData.isLoading())
        return false;

    unsigned length = textRenderer.textLength();
    for (unsigned i = 0; i < length; ++i) {
        UChar character = textRenderer.characterAt(i);
        if (character == ' ')
            continue;

        // These would be easy to support.
        if (character == noBreakSpace)
            return false;
        if (character == softHyphen)
            return false;

        UCharDirection direction = u_charDirection(character);
        if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC
            || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE
            || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE
            || direction == U_POP_DIRECTIONAL_FORMAT || direction == U_BOUNDARY_NEUTRAL)
            return false;

        if (!primaryFontData.glyphForCharacter(character))
            return false;
    }
    return true;
}
Beispiel #14
0
static int utf8_codepoint_is_left_to_right(UChar32 c) {
    return (u_charDirection(c) == U_LEFT_TO_RIGHT);
}
static jbyte Character_getDirectionalityImpl(JNIEnv*, jclass, jint codePoint) {
    return u_charDirection(codePoint);
}
Beispiel #16
0
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which) {
    UErrorCode errorCode;

    if(which<UCHAR_BINARY_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_BINARY_LIMIT) {
        return (int32_t)u_hasBinaryProperty(c, which);
    } else if(which<UCHAR_INT_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_INT_LIMIT) {
        switch(which) {
        case UCHAR_BIDI_CLASS:
            return (int32_t)u_charDirection(c);
        case UCHAR_BLOCK:
            return (int32_t)ublock_getCode(c);
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_CANONICAL_COMBINING_CLASS:
            return u_getCombiningClass(c);
#endif
        case UCHAR_DECOMPOSITION_TYPE:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK);
        case UCHAR_EAST_ASIAN_WIDTH:
            return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
        case UCHAR_GENERAL_CATEGORY:
            return (int32_t)u_charType(c);
        case UCHAR_JOINING_GROUP:
            return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
        case UCHAR_JOINING_TYPE:
            return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
        case UCHAR_LINE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
        case UCHAR_NUMERIC_TYPE: {
            int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1));
            return UPROPS_NTV_GET_TYPE(ntv);
        }
        case UCHAR_SCRIPT:
            errorCode=U_ZERO_ERROR;
            return (int32_t)uscript_getScript(c, &errorCode);
        case UCHAR_HANGUL_SYLLABLE_TYPE: {
            /* see comments on gcbToHst[] above */
            int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
            if(gcb<LENGTHOF(gcbToHst)) {
                return gcbToHst[gcb];
            } else {
                return U_HST_NOT_APPLICABLE;
            }
        }
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_NFD_QUICK_CHECK:
        case UCHAR_NFKD_QUICK_CHECK:
        case UCHAR_NFC_QUICK_CHECK:
        case UCHAR_NFKC_QUICK_CHECK:
            return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)>>8;
        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)&0xff;
#endif
        case UCHAR_GRAPHEME_CLUSTER_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
        case UCHAR_SENTENCE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
        case UCHAR_WORD_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
        default:
            return 0; /* undefined */
        }
    } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
Beispiel #17
0
//static jbyte Character_getDirectionalityImpl(JNIEnv*, jclass, jint codePoint) {
JNIEXPORT jbyte JNICALL
Java_java_lang_Character_getDirectionalityImpl(JNIEnv*, jclass, jint codePoint) {
    return u_charDirection(codePoint);
}
Beispiel #18
0
UCharDirection __hs_u_charDirection(UChar32 c)
{
    return u_charDirection(c);
}
int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, 
                                    UChar* dest, int32_t destCapacity, 
                                    UBool allowUnassigned,
                                    UParseError* parseError,
                                    UErrorCode& status ){
    // check error status
    if(U_FAILURE(status)){
        return 0;
    }
    
    //check arguments
    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
        status=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    UChar b1Stack[MAX_BUFFER_SIZE];
    UChar *b1 = b1Stack;
    int32_t b1Len,b1Capacity = MAX_BUFFER_SIZE;

    int32_t b1Index = 0;
    UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    UBool leftToRight=FALSE, rightToLeft=FALSE;

    b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned,parseError, status);

    if(status == U_BUFFER_OVERFLOW_ERROR){
        // redo processing of string
        /* we do not have enough room so grow the buffer*/
        if(!u_growBufferFromStatic(b1Stack,&b1,&b1Capacity,b1Len,0)){
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CLEANUP;
        }

        status = U_ZERO_ERROR; // reset error
        
        b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status);
        
    }

    if(U_FAILURE(status)){
        goto CLEANUP;
    }


    for(; b1Index<b1Len; ){

        UChar32 ch = 0;

        U16_NEXT(b1, b1Index, b1Len, ch);

        if(prohibited.contains(ch) && ch!=0x0020){
            status = U_IDNA_PROHIBITED_ERROR;
            goto CLEANUP;
        }

        direction = u_charDirection(ch);
        if(firstCharDir==U_CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == U_LEFT_TO_RIGHT){
            leftToRight = TRUE;
        }
        if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
            rightToLeft = TRUE;
        }
    }       
    
    // satisfy 2
    if( leftToRight == TRUE && rightToLeft == TRUE){
        status = U_IDNA_CHECK_BIDI_ERROR;
        goto CLEANUP;
    }

    //satisfy 3
    if( rightToLeft == TRUE && 
        !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
          (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
       ){
        status = U_IDNA_CHECK_BIDI_ERROR;
        return FALSE;
    }

    if(b1Len <= destCapacity){
        uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
    }

CLEANUP:
    if(b1!=b1Stack){
        uprv_free(b1);
    }

    return u_terminateUChars(dest, destCapacity, b1Len, &status);
}
Beispiel #20
0
jbyte fastiva_vm_Character_C$__getDirectionalityImpl(jint codePoint) {
    return u_charDirection(codePoint);
}
// Helper sets the character attribute properties and sets up the script table.
// Does not set tops and bottoms.
void SetupBasicProperties(bool report_errors, bool decompose,
                          UNICHARSET* unicharset) {
  for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
    // Convert any custom ligatures.
    const char* unichar_str = unicharset->id_to_unichar(unichar_id);
    for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
      if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
        unichar_str = UNICHARSET::kCustomLigatures[i][0];
        break;
      }
    }

    // Convert the unichar to UTF32 representation
    std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);

    // Assume that if the property is true for any character in the string,
    // then it holds for the whole "character".
    bool unichar_isalpha = false;
    bool unichar_islower = false;
    bool unichar_isupper = false;
    bool unichar_isdigit = false;
    bool unichar_ispunct = false;

    for (char32 u_ch : uni_vector) {
      if (u_isalpha(u_ch)) unichar_isalpha = true;
      if (u_islower(u_ch)) unichar_islower = true;
      if (u_isupper(u_ch)) unichar_isupper = true;
      if (u_isdigit(u_ch)) unichar_isdigit = true;
      if (u_ispunct(u_ch)) unichar_ispunct = true;
    }

    unicharset->set_isalpha(unichar_id, unichar_isalpha);
    unicharset->set_islower(unichar_id, unichar_islower);
    unicharset->set_isupper(unichar_id, unichar_isupper);
    unicharset->set_isdigit(unichar_id, unichar_isdigit);
    unicharset->set_ispunctuation(unichar_id, unichar_ispunct);

    tesseract::IcuErrorCode err;
    unicharset->set_script(unichar_id, uscript_getName(
        uscript_getScript(uni_vector[0], err)));

    const int num_code_points = uni_vector.size();
    // Obtain the lower/upper case if needed and record it in the properties.
    unicharset->set_other_case(unichar_id, unichar_id);
    if (unichar_islower || unichar_isupper) {
      std::vector<char32> other_case(num_code_points, 0);
      for (int i = 0; i < num_code_points; ++i) {
        // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
        // However since they deal with UChars (so need a conversion function
        // from char32 or UTF8string) and require a meaningful locale string,
        // for now u_tolower()/u_toupper() are used.
        other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :
          u_tolower(uni_vector[i]);
      }
      std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
      UNICHAR_ID other_case_id =
          unicharset->unichar_to_id(other_case_uch.c_str());
      if (other_case_id != INVALID_UNICHAR_ID) {
        unicharset->set_other_case(unichar_id, other_case_id);
      } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {
        tprintf("Other case %s of %s is not in unicharset\n",
                other_case_uch.c_str(), unichar_str);
      }
    }

    // Set RTL property and obtain mirror unichar ID from ICU.
    std::vector<char32> mirrors(num_code_points, 0);
    for (int i = 0; i < num_code_points; ++i) {
      mirrors[i] = u_charMirror(uni_vector[i]);
      if (i == 0) {  // set directionality to that of the 1st code point
        unicharset->set_direction(unichar_id,
                                  static_cast<UNICHARSET::Direction>(
                                      u_charDirection(uni_vector[i])));
      }
    }
    std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
    UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
    if (mirror_uch_id != INVALID_UNICHAR_ID) {
      unicharset->set_mirror(unichar_id, mirror_uch_id);
    } else if (report_errors) {
      tprintf("Mirror %s of %s is not in unicharset\n",
              mirror_uch.c_str(), unichar_str);
    }

    // Record normalized version of this unichar.
    std::string normed_str;
    if (unichar_id != 0 &&
        tesseract::NormalizeUTF8String(
            decompose ? tesseract::UnicodeNormMode::kNFKD
                      : tesseract::UnicodeNormMode::kNFKC,
            tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone,
            unichar_str, &normed_str) &&
        !normed_str.empty()) {
      unicharset->set_normed(unichar_id, normed_str.c_str());
    } else {
      unicharset->set_normed(unichar_id, unichar_str);
    }
    ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());
  }
  unicharset->post_load_setup();
}
Beispiel #22
0
JNIEXPORT jint JNICALL Java_java_text_Bidi_nativeGetDirectionCode
  (JNIEnv *env, jclass cls, jchar c) 
{
    return (jint)u_charDirection((UChar)c);
}
Beispiel #23
0
void get_unicode_info(const char* text, const icu::UnicodeString& us, Sqlite::Statement& insert) {
    bool allokay = true;
    for (const char* t = text; *t; ++t) {
        if (!(std::isalnum(*t) || *t == '_' || *t == ':' || *t == ' ' || *t == '.' || *t == '-')) {
            allokay = false;
            break;
        }
    }

    if (allokay) {
        return;
    }

    bool unusual = false;
    for (icu::StringCharacterIterator it(us); it.hasNext(); it.next()) {
        UChar32 codepoint = it.current32();
        int8_t chartype = u_charType(codepoint);
        if (! u_isprint(codepoint)) {
            unusual = true;
            break;
        }
        if (u_charDirection(codepoint) != 0) {
            unusual = true;
            break;
        }
        if (chartype !=  1 && // UPPERCASE_LETTER
            chartype !=  2 && // LOWERCASE_LETTER
            chartype !=  9 && // DECIMAL_DIGIT_NUMBER
            chartype != 12 && // SPACE_SEPARATOR
            chartype != 19 && // DASH_PUNCTUATION
            chartype != 22 && // CONNECTOR_PUNCTUATION
            chartype != 23) { // OTHER_PUNCTUATION
            unusual = true;
            break;
        }
    }

    if (unusual) {
        int num = 0;
        for (icu::StringCharacterIterator it(us); it.hasNext(); it.next(), ++num) {
            UChar32 codepoint = it.current32();

            int8_t chartype = u_charType(codepoint);

            char buffer[100];
            UErrorCode errorCode = U_ZERO_ERROR;
            u_charName(codepoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);

            UCharDirection direction = u_charDirection(codepoint);
            int32_t block = u_getIntPropertyValue(codepoint, UCHAR_BLOCK);

            icu::UnicodeString ustr(codepoint);
            std::string str;
            ustr.toUTF8String(str);

            char uplus[10];
            snprintf(uplus, 10, "U+%04x", codepoint);

            insert.
                bind_text(text).
                bind_int(num).
                bind_text(str.c_str()).
                bind_text(uplus).
                bind_int(block).
                bind_text(category_to_string(chartype)).
                bind_int(direction).
                bind_text(buffer).
                execute();
        }
    }
}