static jint runBidi(JNIEnv* env, jobject obj, jint dir, jcharArray chsArray, jbyteArray infoArray, int n, jboolean haveInfo) { // Parameters are checked on java side // Failures from GetXXXArrayElements indicate a serious out-of-memory condition // that we don't bother to report, we're probably dead anyway. jint result = 0; jchar* chs = env->GetCharArrayElements(chsArray, NULL); if (chs != NULL) { jbyte* info = env->GetByteArrayElements(infoArray, NULL); if (info != NULL) { UErrorCode status = U_ZERO_ERROR; UBiDi* bidi = ubidi_openSized(n, 0, &status); ubidi_setPara(bidi, chs, n, dir, NULL, &status); if (U_SUCCESS(status)) { for (int i = 0; i < n; ++i) { info[i] = ubidi_getLevelAt(bidi, i); } result = ubidi_getParaLevel(bidi); } else { jniThrowException(env, "java/lang/RuntimeException", NULL); } ubidi_close(bidi); env->ReleaseByteArrayElements(infoArray, info, 0); } env->ReleaseCharArrayElements(chsArray, chs, JNI_ABORT); } return result; }
QList<TextShaper::TextRun> TextShaper::itemizeBiDi() { QList<TextRun> textRuns; UBiDi *obj = ubidi_open(); UErrorCode err = U_ZERO_ERROR; UBiDiLevel parLevel = UBIDI_LTR; ParagraphStyle style = m_story.paragraphStyle(m_firstChar); if (style.direction() == ParagraphStyle::RTL) parLevel = UBIDI_RTL; ubidi_setPara(obj, (const UChar*) m_text.utf16(), m_text.length(), parLevel, NULL, &err); if (U_SUCCESS(err)) { int32_t count = ubidi_countRuns(obj, &err); if (U_SUCCESS(err)) { textRuns.reserve(count); for (int32_t i = 0; i < count; i++) { int32_t start, length; UBiDiDirection dir = ubidi_getVisualRun(obj, i, &start, &length); textRuns.append(TextRun(start, length, dir)); } } } ubidi_close(obj); return textRuns; }
/** * Performs Bidi resolution of text. * * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. * @param pErrorCode Pointer to the error code value. * * @return Whether or not this function modifies the text. Besides the return * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. */ static UBool action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) { ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); return FALSE; }
nsresult nsBidi::SetPara(const char16_t *aText, int32_t aLength, nsBidiLevel aParaLevel) { UErrorCode error = U_ZERO_ERROR; ubidi_setPara(mBiDi, reinterpret_cast<const UChar*>(aText), aLength, aParaLevel, nullptr, &error); return ICUUtils::UErrorToNsResult(error); }
std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<std::size_t> lineBreakPoints) { UErrorCode errorCode = U_ZERO_ERROR; ubidi_setPara(impl->bidiText, mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()), UBIDI_DEFAULT_LTR, nullptr, &errorCode); if (U_FAILURE(errorCode)) { throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode)); } return applyLineBreaking(lineBreakPoints); }
OSMAND_CORE_API QString OSMAND_CORE_CALL OsmAnd::ICU::convertToVisualOrder(const QString& input) { QString output; const auto len = input.length(); UErrorCode icuError = U_ZERO_ERROR; bool ok = true; // Allocate ICU BiDi context const auto pContext = ubidi_openSized(len, 0, &icuError); if(pContext == nullptr || !U_SUCCESS(icuError)) { LogPrintf(LogSeverityLevel::Error, "ICU error: %d", icuError); return input; } // Configure context to reorder from logical to visual ubidi_setReorderingMode(pContext, UBIDI_REORDER_DEFAULT); // Set data ubidi_setPara(pContext, reinterpret_cast<const UChar*>(input.unicode()), len, UBIDI_DEFAULT_RTL, nullptr, &icuError); ok = U_SUCCESS(icuError); if(ok) { QVector<UChar> reordered(len); ubidi_writeReordered(pContext, reordered.data(), len, UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &icuError); ok = U_SUCCESS(icuError); if(ok) { QVector<UChar> reshaped(len); const auto newLen = u_shapeArabic(reordered.constData(), len, reshaped.data(), len, U_SHAPE_TEXT_DIRECTION_VISUAL_LTR | U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_AT_END, &icuError); ok = U_SUCCESS(icuError); if(ok) { output = qMove(QString(reinterpret_cast<const QChar*>(reshaped.constData()), newLen)); } } } // Release context ubidi_close(pContext); if(!ok) { LogPrintf(LogSeverityLevel::Error, "ICU error: %d", icuError); return input; } return output; }
void CTextRenderer::UpdateTextCache_BiDi(array<CHarfbuzzGlyph>* pGlyphChain, const char* pText) { //Use ICU for bidirectional text //note: bidirectional texts appear for example when a latin username is displayed in a arabic text UErrorCode ICUError = U_ZERO_ERROR; UnicodeString UTF16Text = icu::UnicodeString::fromUTF8(pText); UBiDi* pICUBiDi = ubidi_openSized(UTF16Text.length(), 0, &ICUError); //Perform the BiDi algorithm //TODO: change UBIDI_DEFAULT_LTR by some variable dependend of the user config ubidi_setPara(pICUBiDi, UTF16Text.getBuffer(), UTF16Text.length(), (Localization()->GetWritingDirection() == CLocalization::DIRECTION_RTL ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR), 0, &ICUError); if(U_SUCCESS(ICUError)) { UBiDiLevel ICULevel = 1&ubidi_getParaLevel(pICUBiDi); UBiDiDirection Direction = ubidi_getDirection(pICUBiDi); if(Direction != UBIDI_MIXED) { UpdateTextCache_Font(pGlyphChain, UTF16Text.getBuffer(), 0, UTF16Text.length(), (Direction == UBIDI_RTL)); } else { int CharStart = 0; UBiDiLevel level; int NumberOfParts = ubidi_countRuns(pICUBiDi, &ICUError); if(U_SUCCESS(ICUError)) { for(int i=0; i<NumberOfParts; i++) { int Start; int SubLength; Direction = ubidi_getVisualRun(pICUBiDi, i, &Start, &SubLength); UpdateTextCache_Font(pGlyphChain, UTF16Text.getBuffer(), Start, SubLength, (Direction == UBIDI_RTL)); } } else { dbg_msg("TextRenderer", "BiDi algorithm failed (ubidi_countRuns): %s", u_errorName(ICUError)); return; } } } else { dbg_msg("TextRenderer", "BiDi algorithm failed: %s", u_errorName(ICUError)); return; } }
void font_face_set::get_string_info(string_info & info, UnicodeString const& ustr, char_properties *format) { double avg_height = character_dimensions('X').height(); UErrorCode err = U_ZERO_ERROR; UnicodeString reordered; UnicodeString shaped; int32_t length = ustr.length(); UBiDi *bidi = ubidi_openSized(length, 0, &err); ubidi_setPara(bidi, ustr.getBuffer(), length, UBIDI_DEFAULT_LTR, 0, &err); ubidi_writeReordered(bidi, reordered.getBuffer(length), length, UBIDI_DO_MIRRORING, &err); reordered.releaseBuffer(length); u_shapeArabic(reordered.getBuffer(), length, shaped.getBuffer(length), length, U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_NEAR | U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &err); shaped.releaseBuffer(length); if (U_SUCCESS(err)) { StringCharacterIterator iter(shaped); for (iter.setToStart(); iter.hasNext();) { UChar ch = iter.nextPostInc(); char_info char_dim = character_dimensions(ch); char_dim.format = format; char_dim.avg_height = avg_height; info.add_info(char_dim); } } #if (U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 406) if (ubidi_getBaseDirection(ustr.getBuffer(), length) == UBIDI_RTL) { info.set_rtl(true); } #endif ubidi_close(bidi); }
extern "C" void Java_java_text_Bidi_ubidi_1setPara(JNIEnv* env, jclass, jlong ptr, jcharArray text, jint length, jint paraLevel, jbyteArray newEmbeddingLevels) { BiDiData* data = biDiData(ptr); // Copy the new embedding levels from the Java heap to the native heap. if (newEmbeddingLevels != NULL) { jbyte* dst; data->setEmbeddingLevels(dst = new jbyte[length]); env->GetByteArrayRegion(newEmbeddingLevels, 0, length, dst); } else { data->setEmbeddingLevels(NULL); } ScopedCharArrayRO chars(env, text); if (chars.get() == NULL) { return; } UErrorCode err = U_ZERO_ERROR; ubidi_setPara(data->uBiDi(), chars.get(), length, paraLevel, data->embeddingLevels(), &err); maybeThrowIcuException(env, "ubidi_setPara", err); }
void text_itemizer::itemize_direction(unsigned start, unsigned end) { direction_runs_.clear(); UErrorCode error = U_ZERO_ERROR; int32_t length = end - start; UBiDi *bidi = ubidi_openSized(length, 0, &error); if (!bidi || U_FAILURE(error)) { MAPNIK_LOG_ERROR(text_itemizer) << "Failed to create bidi object: " << u_errorName(error) << "\n"; return; } ubidi_setPara(bidi, text_.getBuffer() + start, length, UBIDI_DEFAULT_LTR, 0, &error); if (U_SUCCESS(error)) { UBiDiDirection direction = ubidi_getDirection(bidi); if (direction != UBIDI_MIXED) { direction_runs_.emplace_back(direction, start, end); } else { // mixed-directional int32_t count = ubidi_countRuns(bidi, &error); if(U_SUCCESS(error)) { for(int i=0; i<count; ++i) { int32_t vis_length; int32_t run_start; direction = ubidi_getVisualRun(bidi, i, &run_start, &vis_length); run_start += start; //Add offset to compensate offset in setPara direction_runs_.emplace_back(direction, run_start, run_start+vis_length); } } } } else { MAPNIK_LOG_ERROR(text_itemizer) << "ICU error: " << u_errorName(error) << "\n"; //TODO: Exception } ubidi_close(bidi); }
TextGroup::TextGroup(const std::string &input, hb_script_t script, const std::string &lang, hb_direction_t overallDirection) :script_(script) ,lang_(lang) ,overallDirection_(overallDirection) { if(hb_script_get_horizontal_direction(script_) == HB_DIRECTION_LTR) { addRun(input, HB_DIRECTION_LTR); } else { auto text = UnicodeString::fromUTF8(input); auto length = text.length(); printf("Hominlinx-->TextGroup::TextGroup str unicodelen[%d] ====text[0x%x]\n",length, text.charAt(0) ); UErrorCode err = U_ZERO_ERROR; UBiDi *bidi = ubidi_openSized(length, 0, &err);//Bidrectional text ubidi_setPara(bidi, text.getBuffer(), length, hbDirectionToUBIDILevel(overallDirection_), 0, &err); auto direction = ubidi_getDirection(bidi); if(direction != UBIDI_MIXED) { addRun(input, uciDirectionToHB(direction)); } else { auto count = ubidi_countRuns(bidi, &err); for(int i=0; i < count; ++i) { int32_t start, length; direction = ubidi_getVisualRun(bidi, i, &start, &length); addRun(text, direction, start, start + length); } } ubidi_close(bidi); } }
JNIEXPORT void JNICALL Java_java_text_Bidi_nativeBidiChars (JNIEnv *env, jclass cls, jobject jbidi, jcharArray text, jint tStart, jbyteArray embs, jint eStart, jint length, jint dir) { UErrorCode err = U_ZERO_ERROR; UBiDi* bidi = ubidi_openSized(length, length, &err); if (!U_FAILURE(err)) { jchar *cText = (jchar*)(*env)->GetPrimitiveArrayCritical(env, text, NULL); if (cText) { UBiDiLevel baseLevel = (UBiDiLevel)dir; jbyte *cEmbs = 0; uint8_t *cEmbsAdj = 0; if (embs != NULL) { cEmbs = (jbyte*)(*env)->GetPrimitiveArrayCritical(env, embs, NULL); if (cEmbs) { cEmbsAdj = (uint8_t*)(cEmbs + eStart); } } ubidi_setPara(bidi, cText + tStart, length, baseLevel, cEmbsAdj, &err); if (cEmbs) { (*env)->ReleasePrimitiveArrayCritical(env, embs, cEmbs, JNI_ABORT); } (*env)->ReleasePrimitiveArrayCritical(env, text, cText, JNI_ABORT); if (!U_FAILURE(err)) { jint resDir = (jint)ubidi_getDirection(bidi); jint resLevel = (jint)ubidi_getParaLevel(bidi); jint resRunCount = 0; jintArray resRuns = 0; jintArray resCWS = 0; if (resDir == UBIDI_MIXED) { resRunCount = (jint)ubidi_countRuns(bidi, &err); if (!U_FAILURE(err)) { if (resRunCount) { jint* cResRuns = (jint*)calloc(resRunCount * 2, sizeof(jint)); if (cResRuns) { UTextOffset limit = 0; UBiDiLevel level; jint *p = cResRuns; while (limit < length) { ubidi_getLogicalRun(bidi, limit, &limit, &level); *p++ = (jint)limit; *p++ = (jint)level; } { const DirProp *dp = bidi->dirProps; jint ccws = 0; jint n = 0; p = cResRuns; do { if ((*(p+1) ^ resLevel) & 0x1) { while (n < *p) { if (dp[n++] == WS) { ++ccws; } } } else { n = *p; } p += 2; } while (n < length); resCWS = (*env)->NewIntArray(env, ccws); if (resCWS) { jint* cResCWS = (jint*)(*env)->GetPrimitiveArrayCritical(env, resCWS, NULL); if (cResCWS) { jint ccws = 0; jint n = 0; p = cResRuns; do { if ((*(p+1) ^ resLevel) & 0x1) { while (n < *p) { if (dp[n] == WS) { cResCWS[ccws++] = n; } ++n; } } else { n = *p; } p += 2; } while (n < length); (*env)->ReleasePrimitiveArrayCritical(env, resCWS, cResCWS, 0); } } } resRuns = (*env)->NewIntArray(env, resRunCount * 2); if (resRuns) { (*env)->SetIntArrayRegion(env, resRuns, 0, resRunCount * 2, cResRuns); } free(cResRuns); } } } } resetBidi(env, cls, jbidi, resDir, resLevel, length, resRuns, resCWS); } } ubidi_close(bidi); } }
int icu_bidi_runs(lua_State *L) { size_t input_l; const char* input = luaL_checklstring(L, 1, &input_l); const char* direction = luaL_checkstring(L, 2); UChar *input_as_uchar; int32_t l; utf8_to_uchar(input, input_l, input_as_uchar, l); UBiDiLevel paraLevel = 0; if (strncasecmp(direction, "RTL", 3) == 0) { paraLevel = 1; } /* Now let's bidi! */ UBiDi* bidi = ubidi_open(); UErrorCode err = U_ZERO_ERROR; ubidi_setPara(bidi, input_as_uchar, l, paraLevel, NULL, &err); if (!U_SUCCESS(err)) { free(input_as_uchar); ubidi_close(bidi); return luaL_error(L, "Error in bidi %s", u_errorName(err)); } int count = ubidi_countRuns(bidi,&err); int start, length; lua_checkstack(L,count); for (int i=0; i < count; i++) { UBiDiDirection dir = ubidi_getVisualRun(bidi, i, &start, &length); lua_newtable(L); // Convert back to UTF8... int32_t l3 = 0; char* possibleOutbuf = malloc(4*length); if(!possibleOutbuf) { return luaL_error(L, "Couldn't malloc"); } u_strToUTF8(possibleOutbuf, 4 * length, &l3, input_as_uchar+start, length, &err); if (!U_SUCCESS(err)) { free(possibleOutbuf); return luaL_error(L, "Bidi run too big? %s", u_errorName(err)); } lua_pushstring(L, "run"); lua_pushstring(L, possibleOutbuf); free(possibleOutbuf); lua_settable(L, -3); lua_pushstring(L, "start"); int32_t new_start = start; // Length/start is given in terms of UTF16 codepoints. // But we want a count of Unicode characters. This means // surrogate pairs need to be counted as 1. for (int j=0; j< start; j++) { if (U_IS_TRAIL(*(input_as_uchar+j))) new_start--; } lua_pushinteger(L, new_start); lua_settable(L, -3); lua_pushstring(L, "length"); for (int j=start; j< start+length; j++) { if (U_IS_TRAIL(*(input_as_uchar+j))) length--; } lua_pushinteger(L, length); lua_settable(L, -3); lua_pushstring(L, "dir"); lua_pushstring(L, dir == UBIDI_RTL ? "RTL" : "LTR"); lua_settable(L, -3); lua_pushstring(L, "level"); lua_pushinteger(L, ubidi_getLevelAt(bidi, start)); lua_settable(L, -3); } free(input_as_uchar); ubidi_close(bidi); return count; }
void BiDiConformanceTest::TestBidiTest() { if(isICUVersionBefore(52, 1)) { // TODO: Update the ICU BiDi code to implement the additions in the Unicode 6.3 BiDi Algorithm, // and reenable the BiDi conformance test. return; } IcuTestErrorCode errorCode(*this, "TestBidiTest"); const char *sourceTestDataPath=getSourceTestData(errorCode); if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " "folder (getSourceTestData())")) { return; } char bidiTestPath[400]; strcpy(bidiTestPath, sourceTestDataPath); strcat(bidiTestPath, "BidiTest.txt"); LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); if(bidiTestFile.isNull()) { errln("unable to open %s", bidiTestPath); return; } LocalUBiDiPointer ubidi(ubidi_open()); ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, NULL, NULL, errorCode); if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { return; } lineNumber=0; levelsCount=0; orderingCount=0; errorCount=0; while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { ++lineNumber; // Remove trailing comments and whitespace. char *commentStart=strchr(line, '#'); if(commentStart!=NULL) { *commentStart=0; } u_rtrim(line); const char *start=u_skipWhitespace(line); if(*start==0) { continue; // Skip empty and comment-only lines. } if(*start=='@') { ++start; if(0==strncmp(start, "Levels:", 7)) { if(!parseLevels(start+7)) { return; } } else if(0==strncmp(start, "Reorder:", 8)) { if(!parseOrdering(start+8)) { return; } } // Skip unknown @Xyz: ... } else { if(!parseInputStringFromBiDiClasses(start)) { return; } start=u_skipWhitespace(start); if(*start!=';') { errln("missing ; separator on input line %s", line); return; } start=u_skipWhitespace(start+1); char *end; uint32_t bitset=(uint32_t)strtoul(start, &end, 16); if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { errln("input bitset parse error at %s", start); return; } // Loop over the bitset. static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; for(int i=0; i<=3; ++i) { if(bitset&(1<<i)) { ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), paraLevels[i], NULL, errorCode); const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { errln("Input line %d: %s", (int)lineNumber, line); return; } if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()), paraLevelNames[i])) { // continue outerLoop; does not exist in C++ // so just break out of the inner loop. break; } if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) { // continue outerLoop; does not exist in C++ // so just break out of the inner loop. break; } } } } } }
void font_face_set::get_string_info(string_info & info) { unsigned width = 0; unsigned height = 0; UErrorCode err = U_ZERO_ERROR; UnicodeString const& ustr = info.get_string(); const UChar * text = ustr.getBuffer(); UBiDi * bidi = ubidi_openSized(ustr.length(),0,&err); if (U_SUCCESS(err)) { ubidi_setPara(bidi,text,ustr.length(), UBIDI_DEFAULT_LTR,0,&err); if (U_SUCCESS(err)) { int32_t count = ubidi_countRuns(bidi,&err); int32_t logicalStart; int32_t length; for (int32_t i=0; i< count;++i) { if (UBIDI_LTR == ubidi_getVisualRun(bidi,i,&logicalStart,&length)) { do { UChar ch = text[logicalStart++]; dimension_t char_dim = character_dimensions(ch); info.add_info(ch, char_dim.width, char_dim.height); width += char_dim.width; height = char_dim.height > height ? char_dim.height : height; } while (--length > 0); } else { logicalStart += length; int32_t j=0,i=length; UnicodeString arabic; UChar * buf = arabic.getBuffer(length); do { UChar ch = text[--logicalStart]; buf[j++] = ch; } while (--i > 0); arabic.releaseBuffer(length); if ( *arabic.getBuffer() >= 0x0600 && *arabic.getBuffer() <= 0x06ff) { UnicodeString shaped; u_shapeArabic(arabic.getBuffer(),arabic.length(),shaped.getBuffer(arabic.length()),arabic.length(), U_SHAPE_LETTERS_SHAPE|U_SHAPE_LENGTH_FIXED_SPACES_NEAR| U_SHAPE_TEXT_DIRECTION_VISUAL_LTR ,&err); shaped.releaseBuffer(arabic.length()); if (U_SUCCESS(err)) { for (int j=0;j<shaped.length();++j) { dimension_t char_dim = character_dimensions(shaped[j]); info.add_info(shaped[j], char_dim.width, char_dim.height); width += char_dim.width; height = char_dim.height > height ? char_dim.height : height; } } } else { // Non-Arabic RTL for (int j=0;j<arabic.length();++j) { dimension_t char_dim = character_dimensions(arabic[j]); info.add_info(arabic[j], char_dim.width, char_dim.height); width += char_dim.width; height = char_dim.height > height ? char_dim.height : height; } } } } } ubidi_close(bidi); } info.set_dimensions(width, height); }
/* ******************************************************************************* * * created on: 2013jul01 * created by: Matitiahu Allouche This function performs a conformance test for implementations of the Unicode Bidirectional Algorithm, specified in UAX #9: Unicode Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ Each test case is represented in a single line which is read from a file named BidiCharacter.txt. Empty, blank and comment lines may also appear in this file. The format of the test data is specified below. Note that each test case constitutes a single line of text; reordering is applied within a single line and independently of a rendering engine, and rules L3 and L4 are out of scope. The number sign '#' is the comment character: everything is ignored from the occurrence of '#' until the end of the line, Empty lines and lines containing only spaces and/or comments are ignored. Lines which represent test cases consist of 4 or 5 fields separated by a semicolon. Each field consists of tokens separated by whitespace (space or Tab). Whitespace before and after semicolons is optional. Field 0: A sequence of hexadecimal code point values separated by space Field 1: A value representing the paragraph direction, as follows: - 0 represents left-to-right - 1 represents right-to-left - 2 represents auto-LTR according to rules P2 and P3 of the algorithm - 3 represents auto-RTL according to rules P2 and P3 of the algorithm - a negative number whose absolute value is taken as paragraph level; this may be useful to test cases where the embedding level approaches or exceeds the maximum embedding level. Field 2: The resolved paragraph embedding level. If the input (field 0) includes more than one paragraph, this field represents the resolved level of the first paragraph. Field 3: An ordered list of resulting levels for each token in field 0 (each token represents one source character). The UBA does not assign levels to certain characters (e.g. LRO); characters removed in rule X9 are indicated with an 'x'. Field 4: An ordered list of indices showing the resulting visual ordering from left to right; characters with a resolved level of 'x' are skipped. The number are zero-based. Each index corresponds to a character in the reordered (visual) string. It represents the index of the source character in the input (field 0). This field is optional. When it is absent, the visual ordering is not verified. Examples: # This is a comment line. L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3 L L ON R;0;0;0 0 0 1;0 1 2 3 # Note: in the next line, 'B' represents a block separator, not the letter 'B'. LRE A B C PDF;2;0;x 2 0 0 x;1 2 3 # Note: in the next line, 'b' represents the letter 'b', not a block separator. a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5 a R R x ; 1 ; 1 ; 2 1 1 2 L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1 * ******************************************************************************* */ void BiDiConformanceTest::TestBidiCharacterTest() { IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest"); const char *sourceTestDataPath=getSourceTestData(errorCode); if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " "folder (getSourceTestData())")) { return; } char bidiTestPath[400]; strcpy(bidiTestPath, sourceTestDataPath); strcat(bidiTestPath, "BidiCharacterTest.txt"); LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); if(bidiTestFile.isNull()) { errln("unable to open %s", bidiTestPath); return; } LocalUBiDiPointer ubidi(ubidi_open()); lineNumber=0; levelsCount=0; orderingCount=0; errorCount=0; while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { ++lineNumber; paraLevelName="N/A"; inputString="N/A"; // Remove trailing comments and whitespace. char *commentStart=strchr(line, '#'); if(commentStart!=NULL) { *commentStart=0; } u_rtrim(line); const char *start=u_skipWhitespace(line); if(*start==0) { continue; // Skip empty and comment-only lines. } // Parse the code point string in field 0. UChar *buffer=inputString.getBuffer(200); int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode); if(errorCode.logIfFailureAndReset("Invalid string in field 0")) { errln("Input line %d: %s", (int)lineNumber, line); inputString.remove(); continue; } inputString.releaseBuffer(length); start=strchr(start, ';'); if(start==NULL) { errorCount++; errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); continue; } start=u_skipWhitespace(start+1); char *end; int32_t paraDirection=(int32_t)strtol(start, &end, 10); UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2; if(paraDirection==0) { paraLevel=0; paraLevelName="LTR"; } else if(paraDirection==1) { paraLevel=1; paraLevelName="RTL"; } else if(paraDirection==2) { paraLevel=UBIDI_DEFAULT_LTR; paraLevelName="Auto/LTR"; } else if(paraDirection==3) { paraLevel=UBIDI_DEFAULT_RTL; paraLevelName="Auto/RTL"; } else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) { paraLevel=(UBiDiLevel)(-paraDirection); sprintf(levelNameString, "%d", (int)paraLevel); paraLevelName=levelNameString; } if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) { errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start); printErrorLine(); continue; } start=u_skipWhitespace(end); if(*start!=';') { errorCount++; errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); continue; } start++; uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10); if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || resolvedParaLevel>1) { errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start); printErrorLine(); continue; } start=u_skipWhitespace(end); if(*start!=';') { errorCount++; errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); return; } start++; if(!parseLevels(start)) { continue; } start=u_skipWhitespace(start); if(*start==';') { if(!parseOrdering(start+1)) { continue; } } else orderingCount=-1; ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), paraLevel, NULL, errorCode); const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { errln("Input line %d: %s", (int)lineNumber, line); continue; } UBiDiLevel actualLevel; if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) { printErrorLine(); errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d", (int)lineNumber, resolvedParaLevel, actualLevel); continue; } if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { continue; } if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) { continue; } } }
int main (int argc, char** argv) { if (argc != 2) { std::cerr << "Usage: " << argv[0] << " <num-iter>" << std::endl; return EXIT_FAILURE; } const unsigned NUM_ITER = atoi(argv[1]); // open first face in the font FT_Library ft_library = 0; FT_Error error = FT_Init_FreeType(&ft_library); if (error) throw std::runtime_error("Failed to initialize FreeType2 library"); FT_Face ft_face[NUM_EXAMPLES]; FT_New_Face(ft_library, "fonts/DejaVuSerif.ttf", 0, &ft_face[ENGLISH]); FT_New_Face(ft_library, "fonts/amiri-0.104/amiri-regular.ttf", 0, &ft_face[ARABIC]); FT_New_Face(ft_library, "fonts/fireflysung-1.3.0/fireflysung.ttf", 0, &ft_face[CHINESE]); // Get our harfbuzz font structs hb_font_t *hb_ft_font[NUM_EXAMPLES]; hb_ft_font[ENGLISH] = hb_ft_font_create(ft_face[ENGLISH], NULL); hb_ft_font[ARABIC] = hb_ft_font_create(ft_face[ARABIC] , NULL); hb_ft_font[CHINESE] = hb_ft_font_create(ft_face[CHINESE], NULL); { std::cerr << "Starting ICU shaping:" << std::endl; progress_timer timer1(std::clog,"ICU shaping done"); UErrorCode err = U_ZERO_ERROR; for (unsigned i = 0; i < NUM_ITER; ++i) { for (unsigned j = 0; j < NUM_EXAMPLES; ++j) { UnicodeString text = UnicodeString::fromUTF8(texts[j]); int32_t length = text.length(); UnicodeString reordered; UnicodeString shaped; UBiDi *bidi = ubidi_openSized(length, 0, &err); ubidi_setPara(bidi, text.getBuffer(), length, UBIDI_DEFAULT_LTR, 0, &err); ubidi_writeReordered(bidi, reordered.getBuffer(length), length, UBIDI_DO_MIRRORING, &err); ubidi_close(bidi); reordered.releaseBuffer(length); u_shapeArabic(reordered.getBuffer(), length, shaped.getBuffer(length), length, U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_NEAR | U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &err); shaped.releaseBuffer(length); if (U_SUCCESS(err)) { U_NAMESPACE_QUALIFIER StringCharacterIterator iter(shaped); for (iter.setToStart(); iter.hasNext();) { UChar ch = iter.nextPostInc(); int32_t glyph_index = FT_Get_Char_Index(ft_face[j], ch); if (i == 0) { std::cerr << glyph_index << ":"; } } if (i == 0) std::cerr << std::endl; } } } } { const char **shaper_list = hb_shape_list_shapers(); for ( ;*shaper_list; shaper_list++) { std::cerr << *shaper_list << std::endl; } std::cerr << "Starting Harfbuzz shaping" << std::endl; progress_timer timer2(std::clog,"Harfbuzz shaping done"); const char* const shapers[] = { /*"ot",*/"fallback" }; hb_buffer_t *buffer(hb_buffer_create()); for (unsigned i = 0; i < NUM_ITER; ++i) { for (unsigned j = 0; j < NUM_EXAMPLES; ++j) { UnicodeString text = UnicodeString::fromUTF8(texts[j]); int32_t length = text.length(); hb_buffer_clear_contents(buffer); //hb_buffer_set_unicode_funcs(buffer.get(), hb_icu_get_unicode_funcs()); hb_buffer_pre_allocate(buffer, length); hb_buffer_add_utf16(buffer, text.getBuffer(), text.length(), 0, length); hb_buffer_set_direction(buffer, text_directions[j]); hb_buffer_set_script(buffer, scripts[j]); hb_buffer_set_language(buffer,hb_language_from_string(languages[j], std::strlen(languages[j]))); //hb_shape(hb_ft_font[j], buffer.get(), 0, 0); hb_shape_full(hb_ft_font[j], buffer, 0, 0, shapers); unsigned num_glyphs = hb_buffer_get_length(buffer); hb_glyph_info_t *glyphs = hb_buffer_get_glyph_infos(buffer, NULL); //hb_glyph_position_t *positions = hb_buffer_get_glyph_positions(buffer.get(), NULL); for (unsigned k=0; k<num_glyphs; ++k) { int32_t glyph_index = glyphs[k].codepoint; if (i == 0) { std::cerr << glyph_index << ":"; } } if (i == 0) std::cerr << std::endl; } } hb_buffer_destroy(buffer); } // cleanup for (int j=0; j < NUM_EXAMPLES; ++j) { hb_font_destroy(hb_ft_font[j]); } FT_Done_FreeType(ft_library); return EXIT_SUCCESS; }
static void shape_text(text_line & line, text_itemizer & itemizer, std::map<unsigned,double> & width_map, face_manager_freetype & font_manager, double scale_factor ) { unsigned start = line.first_char(); unsigned end = line.last_char(); mapnik::value_unicode_string const& text = itemizer.text(); size_t length = end - start; if (!length) return; line.reserve(length); std::list<text_item> const& list = itemizer.itemize(start, end); UErrorCode err = U_ZERO_ERROR; mapnik::value_unicode_string shaped; mapnik::value_unicode_string reordered; for (auto const& text_item : list) { face_set_ptr face_set = font_manager.get_face_set(text_item.format->face_name, text_item.format->fontset); double size = text_item.format->text_size * scale_factor; face_set->set_unscaled_character_sizes(); for (auto const& face : *face_set) { UBiDi *bidi = ubidi_openSized(length, 0, &err); ubidi_setPara(bidi, text.getBuffer(), length, UBIDI_DEFAULT_LTR, 0, &err); ubidi_writeReordered(bidi, reordered.getBuffer(length), length, UBIDI_DO_MIRRORING, &err); ubidi_close(bidi); reordered.releaseBuffer(length); int32_t num_char = u_shapeArabic(reordered.getBuffer(), length, shaped.getBuffer(length), length, U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_NEAR | U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &err); if (num_char < 0) { MAPNIK_LOG_ERROR(icu_shaper) << " u_shapeArabic returned negative num_char " << num_char; } std::size_t num_chars = static_cast<std::size_t>(num_char); shaped.releaseBuffer(length); bool shaped_status = true; if (U_SUCCESS(err) && (num_chars == length)) { U_NAMESPACE_QUALIFIER StringCharacterIterator iter(shaped); unsigned i = 0; for (iter.setToStart(); iter.hasNext();) { UChar ch = iter.nextPostInc(); glyph_info tmp; tmp.offset.clear(); tmp.char_index = i; tmp.glyph_index = FT_Get_Char_Index(face->get_face(), ch); if (tmp.glyph_index == 0) { shaped_status = false; break; } tmp.face = face; tmp.format = text_item.format; face->glyph_dimensions(tmp); tmp.scale_multiplier = size / face->get_face()->units_per_EM; width_map[i] += tmp.advance(); line.add_glyph(std::move(tmp), scale_factor); ++i; } } if (!shaped_status) continue; line.update_max_char_height(face->get_char_height(size)); return; } } }
std::vector<StyledText> BiDi::processStyledText(const StyledText& input, std::set<std::size_t> lineBreakPoints) { std::vector<StyledText> lines; const auto& inputText = input.first; const auto& styleIndices = input.second; UErrorCode errorCode = U_ZERO_ERROR; ubidi_setPara(impl->bidiText, mbgl::utf16char_cast<const UChar*>(inputText.c_str()), static_cast<int32_t>(inputText.size()), UBIDI_DEFAULT_LTR, nullptr, &errorCode); if (U_FAILURE(errorCode)) { throw std::runtime_error(std::string("BiDi::processStyledText: ") + u_errorName(errorCode)); } mergeParagraphLineBreaks(lineBreakPoints); std::size_t lineStartIndex = 0; for (std::size_t lineBreakPoint : lineBreakPoints) { StyledText line; line.second.reserve(lineBreakPoint - lineStartIndex); errorCode = U_ZERO_ERROR; ubidi_setLine(impl->bidiText, static_cast<int32_t>(lineStartIndex), static_cast<int32_t>(lineBreakPoint), impl->bidiLine, &errorCode); if (U_FAILURE(errorCode)) { throw std::runtime_error(std::string("BiDi::processStyledText (setLine): ") + u_errorName(errorCode)); } errorCode = U_ZERO_ERROR; uint32_t runCount = ubidi_countRuns(impl->bidiLine, &errorCode); if (U_FAILURE(errorCode)) { throw std::runtime_error(std::string("BiDi::processStyledText (countRuns): ") + u_errorName(errorCode)); } for (uint32_t runIndex = 0; runIndex < runCount; runIndex++) { int32_t runLogicalStart; int32_t runLength; UBiDiDirection direction = ubidi_getVisualRun(impl->bidiLine, runIndex, &runLogicalStart, &runLength); const bool isReversed = direction == UBIDI_RTL; std::size_t logicalStart = lineStartIndex + runLogicalStart; std::size_t logicalEnd = logicalStart + runLength; if (isReversed) { // Within this reversed section, iterate logically backwards // Each time we see a change in style, render a reversed chunk // of everything since the last change std::size_t styleRunStart = logicalEnd; uint8_t currentStyleIndex = styleIndices.at(styleRunStart - 1); for (std::size_t i = logicalEnd - 1; i >= logicalStart; i--) { if (currentStyleIndex != styleIndices.at(i) || i == logicalStart) { std::size_t styleRunEnd = i == logicalStart ? i : i + 1; std::u16string reversed = writeReverse(inputText, styleRunEnd, styleRunStart); line.first += reversed; for (std::size_t j = 0; j < reversed.size(); j++) { line.second.push_back(currentStyleIndex); } currentStyleIndex = styleIndices.at(i); styleRunStart = styleRunEnd; } if (i == 0) { break; } } } else { line.first += input.first.substr(logicalStart, runLength); line.second.insert(line.second.end(), styleIndices.begin() + logicalStart, styleIndices.begin() + logicalStart + runLength); } } lines.push_back(line); lineStartIndex = lineBreakPoint; } return lines; }