static jboolean mirror(JNIEnv* env, jobject obj, jcharArray charArray, int start, int count) { jchar* data = env->GetCharArrayElements(charArray, NULL); bool ret = false; if (data == NULL) { jniThrowException(env, "java/lang/NullPointerException", NULL); goto MIRROR_END; } if (start < 0 || start > start + count || env->GetArrayLength(charArray) < start + count) { jniThrowException(env, "java/lang/ArrayIndexOutOfBoundsException", NULL); goto MIRROR_END; } for (int i = start; i < start + count; i++) { // XXX this thinks it knows that surrogates are never mirrored int c1 = data[i]; int c2 = u_charMirror(c1); if (c1 != c2) { data[i] = c2; ret = true; } } MIRROR_END: env->ReleaseCharArrayElements(charArray, data, JNI_ABORT); return ret; }
String createStringWithMirroredCharacters(StringView string) { unsigned length = string.length(); StringBuilder mirroredCharacters; mirroredCharacters.reserveCapacity(length); for (unsigned i = 0; i < length; ) { UChar32 character; U16_NEXT(string, i, length, character); mirroredCharacters.append(u_charMirror(character)); } return mirroredCharacters.toString(); }
void mirrorCharacters(UChar* destination, const UChar* source, int length) const { int position = 0; bool error = false; // Iterate characters in source and mirror character if needed. while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); character = u_charMirror(character); U16_APPEND(destination, position, length, character, error); ASSERT(!error); position = nextPosition; } }
void ComplexTextController::normalizeSpacesAndMirrorChars(const UChar* source, bool rtl, UChar* destination, int length) { int position = 0; bool error = false; // Iterate characters in source and mirror character if needed. while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); if (Font::treatAsSpace(character)) character = ' '; else if (Font::treatAsZeroWidthSpace(character)) character = zeroWidthSpace; else if (rtl) character = u_charMirror(character); U16_APPEND(destination, position, length, character, error); ASSERT(!error); position = nextPosition; } }
static void normalizeSpacesAndMirrorChars(const UChar* source, UChar* destination, int length, HarfBuzzShaperBase::NormalizeMode normalizeMode) { int position = 0; bool error = false; // Iterate characters in source and mirror character if needed. while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); if (Font::treatAsSpace(character)) character = ' '; else if (Font::treatAsZeroWidthSpace(character)) character = zeroWidthSpace; else if (normalizeMode == HarfBuzzShaperBase::NormalizeMirrorChars) character = u_charMirror(character); U16_APPEND(destination, position, length, character, error); ASSERT_UNUSED(error, !error); position = nextPosition; } }
/** * Performs character mirroring. * * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. * @param pErrorCode Pointer to the error code value. * * @return Whether or not this function modifies the text. Besides the return * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. */ static UBool action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) { UChar32 c; uint32_t i = 0, j = 0; if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { return FALSE; } if (pTransform->destSize < pTransform->srcLength) { *pErrorCode = U_BUFFER_OVERFLOW_ERROR; return FALSE; } do { UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; U16_NEXT(pTransform->src, i, pTransform->srcLength, c); U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); } while (i < pTransform->srcLength); *pTransform->pDestLength = pTransform->srcLength; pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; return TRUE; }
std::pair<GlyphData, GlyphPage*> FontGlyphs::glyphDataAndPageForCharacter(const FontDescription& description, UChar32 c, bool mirror, FontDataVariant variant) const { ASSERT(isMainThread()); if (variant == AutoVariant) { if (description.smallCaps() && !primarySimpleFontData(description)->isSVGFont()) { UChar32 upperC = u_toupper(c); if (upperC != c) { c = upperC; variant = SmallCapsVariant; } else variant = NormalVariant; } else variant = NormalVariant; } if (mirror) c = u_charMirror(c); unsigned pageNumber = (c / GlyphPage::size); GlyphPageTreeNode* node = pageNumber ? m_pages.get(pageNumber) : m_pageZero; if (!node) { node = GlyphPageTreeNode::getRootChild(realizeFontDataAt(description, 0), pageNumber); if (pageNumber) m_pages.set(pageNumber, node); else m_pageZero = node; } GlyphPage* page = 0; if (variant == NormalVariant) { // Fastest loop, for the common case (normal variant). while (true) { page = node->page(); if (page) { GlyphData data = page->glyphDataForCharacter(c); if (data.fontData && (data.fontData->platformData().orientation() == Horizontal || data.fontData->isTextOrientationFallback())) return std::make_pair(data, page); if (data.fontData) { if (Font::isCJKIdeographOrSymbol(c)) { if (!data.fontData->hasVerticalGlyphs()) { // Use the broken ideograph font data. The broken ideograph font will use the horizontal width of glyphs // to make sure you get a square (even for broken glyphs like symbols used for punctuation). variant = BrokenIdeographVariant; break; } #if PLATFORM(COCOA) else if (data.fontData->platformData().syntheticOblique()) return glyphDataAndPageForCJKCharacterWithoutSyntheticItalic(c, data, page, pageNumber); #endif } else return glyphDataAndPageForNonCJKCharacterWithGlyphOrientation(c, description.nonCJKGlyphOrientation(), data, page, pageNumber); return std::make_pair(data, page); } if (node->isSystemFallback()) break; } node = node->getChild(realizeFontDataAt(description, node->level()), pageNumber); if (pageNumber) m_pages.set(pageNumber, node); else m_pageZero = node; } } if (variant != NormalVariant) { while (true) { page = node->page(); if (page) { GlyphData data = page->glyphDataForCharacter(c); if (data.fontData) { // The variantFontData function should not normally return 0. // But if it does, we will just render the capital letter big. RefPtr<SimpleFontData> variantFontData = data.fontData->variantFontData(description, variant); if (!variantFontData) return std::make_pair(data, page); GlyphPageTreeNode* variantNode = GlyphPageTreeNode::getRootChild(variantFontData.get(), pageNumber); GlyphPage* variantPage = variantNode->page(); if (variantPage) { GlyphData data = variantPage->glyphDataForCharacter(c); if (data.fontData) return std::make_pair(data, variantPage); } // Do not attempt system fallback off the variantFontData. This is the very unlikely case that // a font has the lowercase character but the small caps font does not have its uppercase version. return std::make_pair(variantFontData->missingGlyphData(), page); } if (node->isSystemFallback()) break; } node = node->getChild(realizeFontDataAt(description, node->level()), pageNumber); if (pageNumber) m_pages.set(pageNumber, node); else m_pageZero = node; } } ASSERT(page); ASSERT(node->isSystemFallback()); // System fallback is character-dependent. When we get here, we // know that the character in question isn't in the system fallback // font's glyph page. Try to lazily create it here. UChar codeUnits[2]; int codeUnitsLength; if (c <= 0xFFFF) { codeUnits[0] = Font::normalizeSpaces(c); codeUnitsLength = 1; } else { codeUnits[0] = U16_LEAD(c); codeUnits[1] = U16_TRAIL(c); codeUnitsLength = 2; } const SimpleFontData* originalFontData = primaryFontData(description)->fontDataForCharacter(c); RefPtr<SimpleFontData> characterFontData = fontCache().systemFallbackForCharacters(description, originalFontData, m_isForPlatformFont, codeUnits, codeUnitsLength); if (characterFontData) { if (characterFontData->platformData().orientation() == Vertical && !characterFontData->hasVerticalGlyphs() && Font::isCJKIdeographOrSymbol(c)) variant = BrokenIdeographVariant; if (variant != NormalVariant) characterFontData = characterFontData->variantFontData(description, variant); } if (characterFontData) { // Got the fallback glyph and font. GlyphPage* fallbackPage = GlyphPageTreeNode::getRootChild(characterFontData.get(), pageNumber)->page(); GlyphData data = fallbackPage && fallbackPage->fontDataForCharacter(c) ? fallbackPage->glyphDataForCharacter(c) : characterFontData->missingGlyphData(); // Cache it so we don't have to do system fallback again next time. if (variant == NormalVariant) { #if OS(WINCE) // missingGlyphData returns a null character, which is not suitable for GDI to display. // Also, sometimes we cannot map a font for the character on WINCE, but GDI can still // display the character, probably because the font package is not installed correctly. // So we just always set the glyph to be same as the character, and let GDI solve it. page->setGlyphDataForCharacter(c, c, characterFontData.get()); characterFontData->setMaxGlyphPageTreeLevel(std::max(characterFontData->maxGlyphPageTreeLevel(), node->level())); return std::make_pair(page->glyphDataForCharacter(c), page); #else page->setGlyphDataForCharacter(c, data.glyph, data.fontData); data.fontData->setMaxGlyphPageTreeLevel(std::max(data.fontData->maxGlyphPageTreeLevel(), node->level())); if (!Font::isCJKIdeographOrSymbol(c) && data.fontData->platformData().orientation() != Horizontal && !data.fontData->isTextOrientationFallback()) return glyphDataAndPageForNonCJKCharacterWithGlyphOrientation(c, description.nonCJKGlyphOrientation(), data, fallbackPage, pageNumber); #endif } return std::make_pair(data, page); } // Even system fallback can fail; use the missing glyph in that case. // FIXME: It would be nicer to use the missing glyph from the last resort font instead. GlyphData data = primarySimpleFontData(description)->missingGlyphData(); if (variant == NormalVariant) { #if OS(WINCE) // See comment about WINCE GDI handling near setGlyphDataForCharacter above. page->setGlyphDataForCharacter(c, c, data.fontData); data.fontData->setMaxGlyphPageTreeLevel(std::max(data.fontData->maxGlyphPageTreeLevel(), node->level())); return std::make_pair(page->glyphDataForCharacter(c), page); #else page->setGlyphDataForCharacter(c, data.glyph, data.fontData); data.fontData->setMaxGlyphPageTreeLevel(std::max(data.fontData->maxGlyphPageTreeLevel(), node->level())); #endif } return std::make_pair(data, page); }
/* * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we * semantically write RTL runs in reverse and later reverse them again. * Instead, we actually write them in forward order to begin with. * However, if the RTL run was to be mirrored, we need to mirror here now * since the implicit second reversal must not do it. * It looks strange to do mirroring in LTR output, but it is only because * we are writing RTL output in reverse. */ static int32_t doWriteForward(const UChar *src, int32_t srcLength, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) { /* optimize for several combinations of options */ switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) { case 0: { /* simply copy the LTR run to the destination */ int32_t length=srcLength; if(destSize<length) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } do { *dest++=*src++; } while(--length>0); return srcLength; } case UBIDI_DO_MIRRORING: { /* do mirroring */ int32_t i=0, j=0; UChar32 c; if(destSize<srcLength) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } do { UTF_NEXT_CHAR(src, i, srcLength, c); c=u_charMirror(c); UTF_APPEND_CHAR_UNSAFE(dest, j, c); } while(i<srcLength); return srcLength; } case UBIDI_REMOVE_BIDI_CONTROLS: { /* copy the LTR run and remove any BiDi control characters */ int32_t remaining=destSize; UChar c; do { c=*src++; if(!IS_BIDI_CONTROL_CHAR(c)) { if(--remaining<0) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; /* preflight the length */ while(--srcLength>0) { c=*src++; if(!IS_BIDI_CONTROL_CHAR(c)) { --remaining; } } return destSize-remaining; } *dest++=c; } } while(--srcLength>0); return destSize-remaining; } default: { /* remove BiDi control characters and do mirroring */ int32_t remaining=destSize; int32_t i, j=0; UChar32 c; do { i=0; UTF_NEXT_CHAR(src, i, srcLength, c); src+=i; srcLength-=i; if(!IS_BIDI_CONTROL_CHAR(c)) { remaining-=i; if(remaining<0) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; /* preflight the length */ while(srcLength>0) { c=*src++; if(!IS_BIDI_CONTROL_CHAR(c)) { --remaining; } --srcLength; } return destSize-remaining; } c=u_charMirror(c); UTF_APPEND_CHAR_UNSAFE(dest, j, c); } } while(srcLength>0); return j; } } /* end of switch */ }
static int32_t doWriteReverse(const UChar *src, int32_t srcLength, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) { /* * RTL run - * * RTL runs need to be copied to the destination in reverse order * of code points, not code units, to keep Unicode characters intact. * * The general strategy for this is to read the source text * in backward order, collect all code units for a code point * (and optionally following combining characters, see below), * and copy all these code units in ascending order * to the destination for this run. * * Several options request whether combining characters * should be kept after their base characters, * whether BiDi control characters should be removed, and * whether characters should be replaced by their mirror-image * equivalent Unicode characters. */ int32_t i, j; UChar32 c; /* optimize for several combinations of options */ switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) { case 0: /* * With none of the "complicated" options set, the destination * run will have the same length as the source run, * and there is no mirroring and no keeping combining characters * with their base characters. */ if(destSize<srcLength) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } destSize=srcLength; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units for one base character */ UTF_BACK_1(src, 0, srcLength); /* copy this base character */ j=srcLength; do { *dest++=src[j++]; } while(j<i); } while(srcLength>0); break; case UBIDI_KEEP_BASE_COMBINING: /* * Here, too, the destination * run will have the same length as the source run, * and there is no mirroring. * We do need to keep combining characters with their base characters. */ if(destSize<srcLength) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } destSize=srcLength; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units and modifier letters for one base character */ do { UTF_PREV_CHAR(src, 0, srcLength, c); } while(srcLength>0 && IS_COMBINING(u_charType(c))); /* copy this "user character" */ j=srcLength; do { *dest++=src[j++]; } while(j<i); } while(srcLength>0); break; default: /* * With several "complicated" options set, this is the most * general and the slowest copying of an RTL run. * We will do mirroring, remove BiDi controls, and * keep combining characters with their base characters * as requested. */ if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) { i=srcLength; } else { /* we need to find out the destination length of the run, which will not include the BiDi control characters */ int32_t length=srcLength; UChar ch; i=0; do { ch=*src++; if(!IS_BIDI_CONTROL_CHAR(ch)) { ++i; } } while(--length>0); src-=srcLength; } if(destSize<i) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return i; } destSize=i; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units for one base character */ UTF_PREV_CHAR(src, 0, srcLength, c); if(options&UBIDI_KEEP_BASE_COMBINING) { /* collect modifier letters for this base character */ while(srcLength>0 && IS_COMBINING(u_charType(c))) { UTF_PREV_CHAR(src, 0, srcLength, c); } } if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) { /* do not copy this BiDi control character */ continue; } /* copy this "user character" */ j=srcLength; if(options&UBIDI_DO_MIRRORING) { /* mirror only the base character */ int32_t k=0; c=u_charMirror(c); UTF_APPEND_CHAR_UNSAFE(dest, k, c); dest+=k; j+=k; } while(j<i) { *dest++=src[j++]; } } while(srcLength>0); break; } /* end of switch */ return destSize; }
static jchar getMirror(JNIEnv* env, jobject obj, jchar c) { return u_charMirror(c); }
HB_BEGIN_DECLS static hb_codepoint_t hb_icu_get_mirroring (hb_codepoint_t unicode) { return u_charMirror(unicode); }
// Helper sets the character attribute properties and sets up the script table. // Does not set tops and bottoms. void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET* unicharset) { for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) { // Convert any custom ligatures. const char* unichar_str = unicharset->id_to_unichar(unichar_id); for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) { if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) { unichar_str = UNICHARSET::kCustomLigatures[i][0]; break; } } // Convert the unichar to UTF32 representation std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str); // Assume that if the property is true for any character in the string, // then it holds for the whole "character". bool unichar_isalpha = false; bool unichar_islower = false; bool unichar_isupper = false; bool unichar_isdigit = false; bool unichar_ispunct = false; for (char32 u_ch : uni_vector) { if (u_isalpha(u_ch)) unichar_isalpha = true; if (u_islower(u_ch)) unichar_islower = true; if (u_isupper(u_ch)) unichar_isupper = true; if (u_isdigit(u_ch)) unichar_isdigit = true; if (u_ispunct(u_ch)) unichar_ispunct = true; } unicharset->set_isalpha(unichar_id, unichar_isalpha); unicharset->set_islower(unichar_id, unichar_islower); unicharset->set_isupper(unichar_id, unichar_isupper); unicharset->set_isdigit(unichar_id, unichar_isdigit); unicharset->set_ispunctuation(unichar_id, unichar_ispunct); tesseract::IcuErrorCode err; unicharset->set_script(unichar_id, uscript_getName( uscript_getScript(uni_vector[0], err))); const int num_code_points = uni_vector.size(); // Obtain the lower/upper case if needed and record it in the properties. unicharset->set_other_case(unichar_id, unichar_id); if (unichar_islower || unichar_isupper) { std::vector<char32> other_case(num_code_points, 0); for (int i = 0; i < num_code_points; ++i) { // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used. // However since they deal with UChars (so need a conversion function // from char32 or UTF8string) and require a meaningful locale string, // for now u_tolower()/u_toupper() are used. other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) : u_tolower(uni_vector[i]); } std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case); UNICHAR_ID other_case_id = unicharset->unichar_to_id(other_case_uch.c_str()); if (other_case_id != INVALID_UNICHAR_ID) { unicharset->set_other_case(unichar_id, other_case_id); } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) { tprintf("Other case %s of %s is not in unicharset\n", other_case_uch.c_str(), unichar_str); } } // Set RTL property and obtain mirror unichar ID from ICU. std::vector<char32> mirrors(num_code_points, 0); for (int i = 0; i < num_code_points; ++i) { mirrors[i] = u_charMirror(uni_vector[i]); if (i == 0) { // set directionality to that of the 1st code point unicharset->set_direction(unichar_id, static_cast<UNICHARSET::Direction>( u_charDirection(uni_vector[i]))); } } std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors); UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str()); if (mirror_uch_id != INVALID_UNICHAR_ID) { unicharset->set_mirror(unichar_id, mirror_uch_id); } else if (report_errors) { tprintf("Mirror %s of %s is not in unicharset\n", mirror_uch.c_str(), unichar_str); } // Record normalized version of this unichar. std::string normed_str; if (unichar_id != 0 && tesseract::NormalizeUTF8String( decompose ? tesseract::UnicodeNormMode::kNFKD : tesseract::UnicodeNormMode::kNFKC, tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone, unichar_str, &normed_str) && !normed_str.empty()) { unicharset->set_normed(unichar_id, normed_str.c_str()); } else { unicharset->set_normed(unichar_id, unichar_str); } ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size()); } unicharset->post_load_setup(); }
UChar32 __hs_u_charMirror(UChar32 c) { return u_charMirror(c); }