static AvoidanceReasonFlags canUseForText(const CharacterType* text, unsigned length, const Font& font, FallThrough fallthrough) { AvoidanceReasonFlags reasons = NoReason; // FIXME: <textarea maxlength=0> generates empty text node. if (!length) SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextIsEmpty, fallthrough); for (unsigned i = 0; i < length; ++i) { UChar character = text[i]; if (character == ' ') continue; // These would be easy to support. if (character == noBreakSpace) SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextHasNoBreakSpace, fallthrough); if (character == softHyphen) SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextHasSoftHyphen, fallthrough); UCharDirection direction = u_charDirection(character); if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE || direction == U_POP_DIRECTIONAL_FORMAT || direction == U_BOUNDARY_NEUTRAL) SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowTextHasDirectionCharacter, fallthrough); if (!font.glyphForCharacter(character)) SET_REASON_AND_RETURN_IF_NEEDED(reasons, FlowFontIsMissingGlyph, fallthrough); } return reasons; }
/* static */ bool BoxChar::ContainsMostlyRTL(const vector<BoxChar*>& boxes) { int num_rtl = 0, num_ltr = 0; for (int i = 0; i < boxes.size(); ++i) { // Convert the unichar to UTF32 representation GenericVector<char32> uni_vector; if (!UNICHAR::UTF8ToUnicode(boxes[i]->ch_.c_str(), &uni_vector)) { tprintf("Illegal utf8 in boxchar %d string:%s = ", i, boxes[i]->ch_.c_str()); for (int c = 0; c < boxes[i]->ch_.size(); ++c) { tprintf(" 0x%x", boxes[i]->ch_[c]); } tprintf("\n"); continue; } for (int j = 0; j < uni_vector.size(); ++j) { UCharDirection dir = u_charDirection(uni_vector[j]); if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC || dir == U_ARABIC_NUMBER) { ++num_rtl; } else { ++num_ltr; } } } return num_rtl > num_ltr; }
static bool canUseForText(const CharacterType* text, unsigned length, const SimpleFontData& fontData) { // FIXME: <textarea maxlength=0> generates empty text node. if (!length) return false; for (unsigned i = 0; i < length; ++i) { UChar character = text[i]; if (character == ' ') continue; // These would be easy to support. if (character == noBreakSpace) return false; if (character == softHyphen) return false; UCharDirection direction = u_charDirection(character); if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE || direction == U_POP_DIRECTIONAL_FORMAT || direction == U_BOUNDARY_NEUTRAL) return false; if (!fontData.glyphForCharacter(character)) return false; } return true; }
static bool isOneLeftToRightRun(const TextRun& run) { for (int i = 0; i < run.length(); i++) { UCharDirection direction = u_charDirection(run[i]); if (direction == U_RIGHT_TO_LEFT || direction > U_OTHER_NEUTRAL) return false; } return true; }
static void getDirectionalities(JNIEnv* env, jobject obj, jcharArray srcArray, jbyteArray destArray, int count) { jchar* src = env->GetCharArrayElements(srcArray, NULL); jbyte* dest = env->GetByteArrayElements(destArray, NULL); if (src == NULL || dest == NULL) { jniThrowException(env, "java/lang/NullPointerException", NULL); goto DIRECTION_END; } if (env->GetArrayLength(srcArray) < count || env->GetArrayLength(destArray) < count) { jniThrowException(env, "java/lang/ArrayIndexOutOfBoundsException", NULL); goto DIRECTION_END; } for (int i = 0; i < count; i++) { if (src[i] >= 0xD800 && src[i] <= 0xDBFF && i + 1 < count && src[i + 1] >= 0xDC00 && src[i + 1] <= 0xDFFF) { int c = 0x00010000 + ((src[i] - 0xD800) << 10) + (src[i + 1] & 0x3FF); int dir = u_charDirection(c); if (dir < 0 || dir >= U_CHAR_DIRECTION_COUNT) dir = PROPERTY_UNDEFINED; else dir = directionality_map[dir]; dest[i++] = dir; dest[i] = dir; } else { int c = src[i]; int dir = u_charDirection(c); if (dir < 0 || dir >= U_CHAR_DIRECTION_COUNT) dest[i] = PROPERTY_UNDEFINED; else dest[i] = directionality_map[dir]; } } DIRECTION_END: env->ReleaseCharArrayElements(srcArray, src, JNI_ABORT); env->ReleaseByteArrayElements(destArray, dest, JNI_ABORT); }
virtual void call(UErrorCode* pErrorCode) { const UChar *buffer=testcase.getBuffer(); int32_t length=testcase.getBufferLen(); UChar32 c; int32_t i; uint32_t bitSet=0; for(i=0; i<length;) { U16_NEXT(buffer, i, length, c); bitSet|=(uint32_t)1<<u_charDirection(c); } if(length>0 && bitSet==0) { fprintf(stderr, "error: GetBiDiClass() did not collect bits\n"); } }
void VTTCue::determineTextDirection() { DEPRECATED_DEFINE_STATIC_LOCAL(const String, rtTag, (ASCIILiteral("rt"))); createWebVTTNodeTree(); if (!m_webVTTNodeTree) return; // Apply the Unicode Bidirectional Algorithm's Paragraph Level steps to the // concatenation of the values of each WebVTT Text Object in nodes, in a // pre-order, depth-first traversal, excluding WebVTT Ruby Text Objects and // their descendants. StringBuilder paragraphBuilder; for (Node* node = m_webVTTNodeTree->firstChild(); node; node = NodeTraversal::next(node, m_webVTTNodeTree.get())) { // FIXME: The code does not match the comment above. This does not actually exclude Ruby Text Object descendant. if (!node->isTextNode() || node->localName() == rtTag) continue; paragraphBuilder.append(node->nodeValue()); } String paragraph = paragraphBuilder.toString(); if (!paragraph.length()) return; for (size_t i = 0; i < paragraph.length(); ++i) { UChar current = paragraph[i]; if (!current || isCueParagraphSeparator(current)) return; if (UChar current = paragraph[i]) { UCharDirection charDirection = u_charDirection(current); if (charDirection == U_LEFT_TO_RIGHT) { m_displayDirection = CSSValueLtr; return; } if (charDirection == U_RIGHT_TO_LEFT || charDirection == U_RIGHT_TO_LEFT_ARABIC) { m_displayDirection = CSSValueRtl; return; } } } }
// Increments *num_rtl and *num_ltr according to the directionality of // characters in the box. void BoxChar::GetDirection(int* num_rtl, int* num_ltr) const { // Convert the unichar to UTF32 representation std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(ch_.c_str()); if (uni_vector.empty()) { tprintf("Illegal utf8 in boxchar string:%s = ", ch_.c_str()); for (int c = 0; c < ch_.size(); ++c) { tprintf(" 0x%x", ch_[c]); } tprintf("\n"); return; } for (char32 ch : uni_vector) { UCharDirection dir = u_charDirection(ch); if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC || dir == U_ARABIC_NUMBER || dir == U_RIGHT_TO_LEFT_ISOLATE) { ++*num_rtl; } else if (dir != U_DIR_NON_SPACING_MARK && dir != U_BOUNDARY_NEUTRAL) { ++*num_ltr; } } }
static void printProps(UChar32 codePoint) { char buffer[100]; UErrorCode errorCode; /* get the character name */ errorCode=U_ZERO_ERROR; u_charName(codePoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode); /* print the code point and the character name */ printf("U+%04lx\t%s\n", codePoint, buffer); /* print some properties */ printf(" general category (numeric enum value): %u\n", u_charType(codePoint)); /* note: these APIs do not provide the data from SpecialCasing.txt */ printf(" is lowercase: %d uppercase: U+%04lx\n", u_islower(codePoint), u_toupper(codePoint)); printf(" is digit: %d decimal digit value: %d\n", u_isdigit(codePoint), u_charDigitValue(codePoint)); printf(" BiDi directional category (numeric enum value): %u\n", u_charDirection(codePoint)); }
bool canUseFor(const RenderBlockFlow& flow) { #if !PLATFORM(MAC) // FIXME: Non-mac platforms are hitting ASSERT(run.charactersLength() >= run.length()) // https://bugs.webkit.org/show_bug.cgi?id=123338 return false; #endif if (!flow.firstChild()) return false; // This currently covers <blockflow>#text</blockflow> case. // The <blockflow><inline>#text</inline></blockflow> case is also popular and should be relatively easy to cover. if (flow.firstChild() != flow.lastChild()) return false; if (!flow.firstChild()->isText()) return false; // Supporting floats would be very beneficial. if (flow.containsFloats()) return false; if (!flow.isHorizontalWritingMode()) return false; if (flow.flowThreadState() != RenderObject::NotInsideFlowThread) return false; if (flow.hasOutline()) return false; if (flow.isRubyText() || flow.isRubyBase()) return false; // These tests only works during layout. Outside layout this function may give false positives. if (flow.view().layoutState()) { #if ENABLE(CSS_SHAPES) if (flow.view().layoutState()->shapeInsideInfo()) return false; #endif if (flow.view().layoutState()->m_columnInfo) return false; } const RenderStyle& style = *flow.style(); // It shoudn't be hard to support other alignments. if (style.textAlign() != LEFT && style.textAlign() != WEBKIT_LEFT && style.textAlign() != TASTART) return false; // Non-visible overflow should be pretty easy to support. if (style.overflowX() != OVISIBLE || style.overflowY() != OVISIBLE) return false; // Pre/no-wrap would be very helpful to support. if (style.whiteSpace() != NORMAL) return false; if (!style.textIndent().isZero()) return false; if (style.wordSpacing() || style.letterSpacing()) return false; if (style.textTransform() != TTNONE) return false; if (!style.isLeftToRightDirection()) return false; if (style.lineBoxContain() != RenderStyle::initialLineBoxContain()) return false; if (style.writingMode() != TopToBottomWritingMode) return false; if (style.lineBreak() != LineBreakAuto) return false; if (style.wordBreak() != NormalWordBreak) return false; if (style.unicodeBidi() != UBNormal || style.rtlOrdering() != LogicalOrder) return false; if (style.lineAlign() != LineAlignNone || style.lineSnap() != LineSnapNone) return false; if (style.hyphens() == HyphensAuto) return false; if (style.textEmphasisFill() != TextEmphasisFillFilled || style.textEmphasisMark() != TextEmphasisMarkNone) return false; if (style.textShadow()) return false; #if ENABLE(CSS_SHAPES) if (style.resolvedShapeInside()) return true; #endif if (style.textOverflow() || (flow.isAnonymousBlock() && flow.parent()->style()->textOverflow())) return false; if (style.hasPseudoStyle(FIRST_LINE) || style.hasPseudoStyle(FIRST_LETTER)) return false; if (style.hasTextCombine()) return false; if (style.overflowWrap() != NormalOverflowWrap) return false; if (style.backgroundClip() == TextFillBox) return false; if (style.borderFit() == BorderFitLines) return false; const RenderText& textRenderer = toRenderText(*flow.firstChild()); if (textRenderer.isCombineText() || textRenderer.isCounter() || textRenderer.isQuote() || textRenderer.isTextFragment() #if ENABLE(SVG) || textRenderer.isSVGInlineText() #endif ) return false; if (style.font().codePath(TextRun(textRenderer.text())) != Font::Simple) return false; auto primaryFontData = style.font().primaryFont(); unsigned length = textRenderer.textLength(); unsigned consecutiveSpaceCount = 0; for (unsigned i = 0; i < length; ++i) { // This rejects anything with more than one consecutive whitespace, except at the beginning or end. // This is because we don't currently do subruns within lines. Fixing this would improve coverage significantly. UChar character = textRenderer.characterAt(i); if (isWhitespace(character)) ++consecutiveSpaceCount; else { if (consecutiveSpaceCount != i && consecutiveSpaceCount > 1) return false; consecutiveSpaceCount = 0; } // These would be easy to support. if (character == noBreakSpace) return false; if (character == softHyphen) return false; static const UChar lowestRTLCharacter = 0x590; if (character >= lowestRTLCharacter) { UCharDirection direction = u_charDirection(character); if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE) return false; } if (!primaryFontData->glyphForCharacter(character)) return false; } return true; }
/* * Get the directional properties for the text, * calculate the flags bit-set, and * determine the partagraph level if necessary. */ static void getDirProps(UBiDi *pBiDi, const UChar *text) { DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ int32_t i=0, i0, i1, length=pBiDi->length; Flags flags=0; /* collect all directionalities in the text */ UChar uchar; DirProp dirProp; if(IS_DEFAULT_LEVEL(pBiDi->paraLevel)) { /* determine the paragraph level (P2..P3) */ for(;;) { uchar=text[i]; if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(text[i+1])) { /* not a surrogate pair */ flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_charDirection(uchar)); } else { /* a surrogate pair */ dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */ flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_surrogatePairDirection(uchar, text[i]))|DIRPROP_FLAG(BN); } ++i; if(dirProp==L) { pBiDi->paraLevel=0; break; } else if(dirProp==R || dirProp==AL) { pBiDi->paraLevel=1; break; } else if(i>=length) { /* * see comment in ubidi.h: * the DEFAULT_XXX values are designed so that * their bit 0 alone yields the intended default */ pBiDi->paraLevel&=1; break; } } } else { flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); } /* get the rest of the directional properties and the flags bits */ while(i<length) { uchar=text[i]; if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(text[i+1])) { /* not a surrogate pair */ flags|=DIRPROP_FLAG(dirProps[i]=u_charDirection(uchar)); } else { /* a surrogate pair */ dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */ flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_surrogatePairDirection(uchar, text[i]))|DIRPROP_FLAG(BN); } ++i; } if(flags&MASK_EMBEDDING) { flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); } pBiDi->flags=flags; }
UCharDirection direction() const { return atEnd() ? U_OTHER_NEUTRAL : u_charDirection(current()); }
bool canUseFor(const RenderBlockFlow& flow) { #if !PLATFORM(MAC) && !PLATFORM(GTK) && !PLATFORM(EFL) // FIXME: Non-mac platforms are hitting ASSERT(run.charactersLength() >= run.length()) // https://bugs.webkit.org/show_bug.cgi?id=123338 return false; #endif if (!flow.frame().settings().simpleLineLayoutEnabled()) return false; if (!flow.firstChild()) return false; // This currently covers <blockflow>#text</blockflow> case. // The <blockflow><inline>#text</inline></blockflow> case is also popular and should be relatively easy to cover. if (flow.firstChild() != flow.lastChild()) return false; if (!flow.firstChild()->isText()) return false; // Supporting floats would be very beneficial. if (flow.containsFloats()) return false; if (!flow.isHorizontalWritingMode()) return false; if (flow.flowThreadState() != RenderObject::NotInsideFlowThread) return false; if (flow.hasOutline()) return false; if (flow.isRubyText() || flow.isRubyBase()) return false; if (flow.parent()->isDeprecatedFlexibleBox()) return false; // These tests only works during layout. Outside layout this function may give false positives. if (flow.view().layoutState()) { #if ENABLE(CSS_SHAPES) if (flow.view().layoutState()->shapeInsideInfo()) return false; #endif if (flow.view().layoutState()->m_columnInfo) return false; } const RenderStyle& style = flow.style(); if (style.textDecorationsInEffect() != TextDecorationNone) return false; if (style.textAlign() == JUSTIFY) return false; // Non-visible overflow should be pretty easy to support. if (style.overflowX() != OVISIBLE || style.overflowY() != OVISIBLE) return false; // Pre/no-wrap would be very helpful to support. if (style.whiteSpace() != NORMAL) return false; if (!style.textIndent().isZero()) return false; if (style.wordSpacing() || style.letterSpacing()) return false; if (style.textTransform() != TTNONE) return false; if (!style.isLeftToRightDirection()) return false; if (style.lineBoxContain() != RenderStyle::initialLineBoxContain()) return false; if (style.writingMode() != TopToBottomWritingMode) return false; if (style.lineBreak() != LineBreakAuto) return false; if (style.wordBreak() != NormalWordBreak) return false; if (style.unicodeBidi() != UBNormal || style.rtlOrdering() != LogicalOrder) return false; if (style.lineAlign() != LineAlignNone || style.lineSnap() != LineSnapNone) return false; if (style.hyphens() == HyphensAuto) return false; if (style.textEmphasisFill() != TextEmphasisFillFilled || style.textEmphasisMark() != TextEmphasisMarkNone) return false; if (style.textShadow()) return false; #if ENABLE(CSS_SHAPES) if (style.resolvedShapeInside()) return true; #endif if (style.textOverflow() || (flow.isAnonymousBlock() && flow.parent()->style().textOverflow())) return false; if (style.hasPseudoStyle(FIRST_LINE) || style.hasPseudoStyle(FIRST_LETTER)) return false; if (style.hasTextCombine()) return false; if (style.overflowWrap() != NormalOverflowWrap) return false; if (style.backgroundClip() == TextFillBox) return false; if (style.borderFit() == BorderFitLines) return false; const RenderText& textRenderer = toRenderText(*flow.firstChild()); if (textRenderer.isCombineText() || textRenderer.isCounter() || textRenderer.isQuote() || textRenderer.isTextFragment() #if ENABLE(SVG) || textRenderer.isSVGInlineText() #endif ) return false; if (style.font().codePath(TextRun(textRenderer.text())) != Font::Simple) return false; // We assume that all lines have metrics based purely on the primary font. auto& primaryFontData = *style.font().primaryFont(); if (primaryFontData.isLoading()) return false; unsigned length = textRenderer.textLength(); for (unsigned i = 0; i < length; ++i) { UChar character = textRenderer.characterAt(i); if (character == ' ') continue; // These would be easy to support. if (character == noBreakSpace) return false; if (character == softHyphen) return false; UCharDirection direction = u_charDirection(character); if (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC || direction == U_RIGHT_TO_LEFT_EMBEDDING || direction == U_RIGHT_TO_LEFT_OVERRIDE || direction == U_LEFT_TO_RIGHT_EMBEDDING || direction == U_LEFT_TO_RIGHT_OVERRIDE || direction == U_POP_DIRECTIONAL_FORMAT || direction == U_BOUNDARY_NEUTRAL) return false; if (!primaryFontData.glyphForCharacter(character)) return false; } return true; }
static int utf8_codepoint_is_left_to_right(UChar32 c) { return (u_charDirection(c) == U_LEFT_TO_RIGHT); }
static jbyte Character_getDirectionalityImpl(JNIEnv*, jclass, jint codePoint) { return u_charDirection(codePoint); }
U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(UChar32 c, UProperty which) { UErrorCode errorCode; if(which<UCHAR_BINARY_START) { return 0; /* undefined */ } else if(which<UCHAR_BINARY_LIMIT) { return (int32_t)u_hasBinaryProperty(c, which); } else if(which<UCHAR_INT_START) { return 0; /* undefined */ } else if(which<UCHAR_INT_LIMIT) { switch(which) { case UCHAR_BIDI_CLASS: return (int32_t)u_charDirection(c); case UCHAR_BLOCK: return (int32_t)ublock_getCode(c); #if !UCONFIG_NO_NORMALIZATION case UCHAR_CANONICAL_COMBINING_CLASS: return u_getCombiningClass(c); #endif case UCHAR_DECOMPOSITION_TYPE: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK); case UCHAR_EAST_ASIAN_WIDTH: return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT; case UCHAR_GENERAL_CATEGORY: return (int32_t)u_charType(c); case UCHAR_JOINING_GROUP: return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); case UCHAR_JOINING_TYPE: return ubidi_getJoiningType(GET_BIDI_PROPS(), c); case UCHAR_LINE_BREAK: return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT; case UCHAR_NUMERIC_TYPE: { int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1)); return UPROPS_NTV_GET_TYPE(ntv); } case UCHAR_SCRIPT: errorCode=U_ZERO_ERROR; return (int32_t)uscript_getScript(c, &errorCode); case UCHAR_HANGUL_SYLLABLE_TYPE: { /* see comments on gcbToHst[] above */ int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; if(gcb<LENGTHOF(gcbToHst)) { return gcbToHst[gcb]; } else { return U_HST_NOT_APPLICABLE; } } #if !UCONFIG_NO_NORMALIZATION case UCHAR_NFD_QUICK_CHECK: case UCHAR_NFKD_QUICK_CHECK: case UCHAR_NFC_QUICK_CHECK: case UCHAR_NFKC_QUICK_CHECK: return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); case UCHAR_LEAD_CANONICAL_COMBINING_CLASS: return getFCD16(c)>>8; case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS: return getFCD16(c)&0xff; #endif case UCHAR_GRAPHEME_CLUSTER_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; case UCHAR_SENTENCE_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT; case UCHAR_WORD_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT; default: return 0; /* undefined */ } } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
//static jbyte Character_getDirectionalityImpl(JNIEnv*, jclass, jint codePoint) { JNIEXPORT jbyte JNICALL Java_java_lang_Character_getDirectionalityImpl(JNIEnv*, jclass, jint codePoint) { return u_charDirection(codePoint); }
UCharDirection __hs_u_charDirection(UChar32 c) { return u_charDirection(c); }
int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UBool allowUnassigned, UParseError* parseError, UErrorCode& status ){ // check error status if(U_FAILURE(status)){ return 0; } //check arguments if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { status=U_ILLEGAL_ARGUMENT_ERROR; return 0; } UChar b1Stack[MAX_BUFFER_SIZE]; UChar *b1 = b1Stack; int32_t b1Len,b1Capacity = MAX_BUFFER_SIZE; int32_t b1Index = 0; UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; UBool leftToRight=FALSE, rightToLeft=FALSE; b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned,parseError, status); if(status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ if(!u_growBufferFromStatic(b1Stack,&b1,&b1Capacity,b1Len,0)){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } status = U_ZERO_ERROR; // reset error b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status); } if(U_FAILURE(status)){ goto CLEANUP; } for(; b1Index<b1Len; ){ UChar32 ch = 0; U16_NEXT(b1, b1Index, b1Len, ch); if(prohibited.contains(ch) && ch!=0x0020){ status = U_IDNA_PROHIBITED_ERROR; goto CLEANUP; } direction = u_charDirection(ch); if(firstCharDir==U_CHAR_DIRECTION_COUNT){ firstCharDir = direction; } if(direction == U_LEFT_TO_RIGHT){ leftToRight = TRUE; } if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ rightToLeft = TRUE; } } // satisfy 2 if( leftToRight == TRUE && rightToLeft == TRUE){ status = U_IDNA_CHECK_BIDI_ERROR; goto CLEANUP; } //satisfy 3 if( rightToLeft == TRUE && !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) ){ status = U_IDNA_CHECK_BIDI_ERROR; return FALSE; } if(b1Len <= destCapacity){ uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR); } CLEANUP: if(b1!=b1Stack){ uprv_free(b1); } return u_terminateUChars(dest, destCapacity, b1Len, &status); }
jbyte fastiva_vm_Character_C$__getDirectionalityImpl(jint codePoint) { return u_charDirection(codePoint); }
// Helper sets the character attribute properties and sets up the script table. // Does not set tops and bottoms. void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET* unicharset) { for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) { // Convert any custom ligatures. const char* unichar_str = unicharset->id_to_unichar(unichar_id); for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) { if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) { unichar_str = UNICHARSET::kCustomLigatures[i][0]; break; } } // Convert the unichar to UTF32 representation std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str); // Assume that if the property is true for any character in the string, // then it holds for the whole "character". bool unichar_isalpha = false; bool unichar_islower = false; bool unichar_isupper = false; bool unichar_isdigit = false; bool unichar_ispunct = false; for (char32 u_ch : uni_vector) { if (u_isalpha(u_ch)) unichar_isalpha = true; if (u_islower(u_ch)) unichar_islower = true; if (u_isupper(u_ch)) unichar_isupper = true; if (u_isdigit(u_ch)) unichar_isdigit = true; if (u_ispunct(u_ch)) unichar_ispunct = true; } unicharset->set_isalpha(unichar_id, unichar_isalpha); unicharset->set_islower(unichar_id, unichar_islower); unicharset->set_isupper(unichar_id, unichar_isupper); unicharset->set_isdigit(unichar_id, unichar_isdigit); unicharset->set_ispunctuation(unichar_id, unichar_ispunct); tesseract::IcuErrorCode err; unicharset->set_script(unichar_id, uscript_getName( uscript_getScript(uni_vector[0], err))); const int num_code_points = uni_vector.size(); // Obtain the lower/upper case if needed and record it in the properties. unicharset->set_other_case(unichar_id, unichar_id); if (unichar_islower || unichar_isupper) { std::vector<char32> other_case(num_code_points, 0); for (int i = 0; i < num_code_points; ++i) { // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used. // However since they deal with UChars (so need a conversion function // from char32 or UTF8string) and require a meaningful locale string, // for now u_tolower()/u_toupper() are used. other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) : u_tolower(uni_vector[i]); } std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case); UNICHAR_ID other_case_id = unicharset->unichar_to_id(other_case_uch.c_str()); if (other_case_id != INVALID_UNICHAR_ID) { unicharset->set_other_case(unichar_id, other_case_id); } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) { tprintf("Other case %s of %s is not in unicharset\n", other_case_uch.c_str(), unichar_str); } } // Set RTL property and obtain mirror unichar ID from ICU. std::vector<char32> mirrors(num_code_points, 0); for (int i = 0; i < num_code_points; ++i) { mirrors[i] = u_charMirror(uni_vector[i]); if (i == 0) { // set directionality to that of the 1st code point unicharset->set_direction(unichar_id, static_cast<UNICHARSET::Direction>( u_charDirection(uni_vector[i]))); } } std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors); UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str()); if (mirror_uch_id != INVALID_UNICHAR_ID) { unicharset->set_mirror(unichar_id, mirror_uch_id); } else if (report_errors) { tprintf("Mirror %s of %s is not in unicharset\n", mirror_uch.c_str(), unichar_str); } // Record normalized version of this unichar. std::string normed_str; if (unichar_id != 0 && tesseract::NormalizeUTF8String( decompose ? tesseract::UnicodeNormMode::kNFKD : tesseract::UnicodeNormMode::kNFKC, tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone, unichar_str, &normed_str) && !normed_str.empty()) { unicharset->set_normed(unichar_id, normed_str.c_str()); } else { unicharset->set_normed(unichar_id, unichar_str); } ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size()); } unicharset->post_load_setup(); }
JNIEXPORT jint JNICALL Java_java_text_Bidi_nativeGetDirectionCode (JNIEnv *env, jclass cls, jchar c) { return (jint)u_charDirection((UChar)c); }
void get_unicode_info(const char* text, const icu::UnicodeString& us, Sqlite::Statement& insert) { bool allokay = true; for (const char* t = text; *t; ++t) { if (!(std::isalnum(*t) || *t == '_' || *t == ':' || *t == ' ' || *t == '.' || *t == '-')) { allokay = false; break; } } if (allokay) { return; } bool unusual = false; for (icu::StringCharacterIterator it(us); it.hasNext(); it.next()) { UChar32 codepoint = it.current32(); int8_t chartype = u_charType(codepoint); if (! u_isprint(codepoint)) { unusual = true; break; } if (u_charDirection(codepoint) != 0) { unusual = true; break; } if (chartype != 1 && // UPPERCASE_LETTER chartype != 2 && // LOWERCASE_LETTER chartype != 9 && // DECIMAL_DIGIT_NUMBER chartype != 12 && // SPACE_SEPARATOR chartype != 19 && // DASH_PUNCTUATION chartype != 22 && // CONNECTOR_PUNCTUATION chartype != 23) { // OTHER_PUNCTUATION unusual = true; break; } } if (unusual) { int num = 0; for (icu::StringCharacterIterator it(us); it.hasNext(); it.next(), ++num) { UChar32 codepoint = it.current32(); int8_t chartype = u_charType(codepoint); char buffer[100]; UErrorCode errorCode = U_ZERO_ERROR; u_charName(codepoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode); UCharDirection direction = u_charDirection(codepoint); int32_t block = u_getIntPropertyValue(codepoint, UCHAR_BLOCK); icu::UnicodeString ustr(codepoint); std::string str; ustr.toUTF8String(str); char uplus[10]; snprintf(uplus, 10, "U+%04x", codepoint); insert. bind_text(text). bind_int(num). bind_text(str.c_str()). bind_text(uplus). bind_int(block). bind_text(category_to_string(chartype)). bind_int(direction). bind_text(buffer). execute(); } } }