// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining // logical order. Output won't be intelligible until the bidirectional algorithm is applied std::u16string applyArabicShaping(const std::u16string& input) { UErrorCode errorCode = U_ZERO_ERROR; const int32_t outputLength = u_shapeArabic(mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()), nullptr, 0, (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), &errorCode); // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR errorCode = U_ZERO_ERROR; std::u16string outputText(outputLength, 0); u_shapeArabic(mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()), mbgl::utf16char_cast<UChar*>(&outputText[0]), outputLength, (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), &errorCode); // If the algorithm fails for any reason, fall back to non-transformed text if (U_FAILURE(errorCode)) return input; return outputText; }
/** * Calls a lower level shaping function. * * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. * @param options Shaping options. * @param pErrorCode Pointer to the error code value. */ static void doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) { *pTransform->pDestLength = u_shapeArabic(pTransform->src, pTransform->srcLength, pTransform->dest, pTransform->destSize, options, pErrorCode); }
void TestArabicShapeThreads::doTailTest(void) { static const UChar src[] = { 0x0020, 0x0633, 0 }; static const UChar dst_old[] = { 0xFEB1, 0x200B,0 }; static const UChar dst_new[] = { 0xFEB1, 0xFE73,0 }; UChar dst[3] = { 0x0000, 0x0000,0 }; int32_t length; UErrorCode status; for (int32_t loopCount = 0; loopCount < 100; loopCount++) { status = U_ZERO_ERROR; length = u_shapeArabic(src, -1, dst, UPRV_LENGTHOF(dst), U_SHAPE_LETTERS_SHAPE|U_SHAPE_SEEN_TWOCELL_NEAR, &status); if(U_FAILURE(status)) { IntlTest::gTest->errln("Fail: status %s\n", u_errorName(status)); return; } else if(length!=2) { IntlTest::gTest->errln("Fail: len %d expected 3\n", length); return; } else if(u_strncmp(dst,dst_old,UPRV_LENGTHOF(dst))) { IntlTest::gTest->errln("Fail: got U+%04X U+%04X expected U+%04X U+%04X\n", dst[0],dst[1],dst_old[0],dst_old[1]); return; } //"Trying new tail status = U_ZERO_ERROR; length = u_shapeArabic(src, -1, dst, UPRV_LENGTHOF(dst), U_SHAPE_LETTERS_SHAPE|U_SHAPE_SEEN_TWOCELL_NEAR|U_SHAPE_TAIL_NEW_UNICODE, &status); if(U_FAILURE(status)) { IntlTest::gTest->errln("Fail: status %s\n", u_errorName(status)); return; } else if(length!=2) { IntlTest::gTest->errln("Fail: len %d expected 3\n", length); return; } else if(u_strncmp(dst,dst_new,UPRV_LENGTHOF(dst))) { IntlTest::gTest->errln("Fail: got U+%04X U+%04X expected U+%04X U+%04X\n", dst[0],dst[1],dst_new[0],dst_new[1]); return; } } return; }
OSMAND_CORE_API QString OSMAND_CORE_CALL OsmAnd::ICU::convertToVisualOrder(const QString& input) { QString output; const auto len = input.length(); UErrorCode icuError = U_ZERO_ERROR; bool ok = true; // Allocate ICU BiDi context const auto pContext = ubidi_openSized(len, 0, &icuError); if(pContext == nullptr || !U_SUCCESS(icuError)) { LogPrintf(LogSeverityLevel::Error, "ICU error: %d", icuError); return input; } // Configure context to reorder from logical to visual ubidi_setReorderingMode(pContext, UBIDI_REORDER_DEFAULT); // Set data ubidi_setPara(pContext, reinterpret_cast<const UChar*>(input.unicode()), len, UBIDI_DEFAULT_RTL, nullptr, &icuError); ok = U_SUCCESS(icuError); if(ok) { QVector<UChar> reordered(len); ubidi_writeReordered(pContext, reordered.data(), len, UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &icuError); ok = U_SUCCESS(icuError); if(ok) { QVector<UChar> reshaped(len); const auto newLen = u_shapeArabic(reordered.constData(), len, reshaped.data(), len, U_SHAPE_TEXT_DIRECTION_VISUAL_LTR | U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_AT_END, &icuError); ok = U_SUCCESS(icuError); if(ok) { output = qMove(QString(reinterpret_cast<const QChar*>(reshaped.constData()), newLen)); } } } // Release context ubidi_close(pContext); if(!ok) { LogPrintf(LogSeverityLevel::Error, "ICU error: %d", icuError); return input; } return output; }
void font_face_set::get_string_info(string_info & info, UnicodeString const& ustr, char_properties *format) { double avg_height = character_dimensions('X').height(); UErrorCode err = U_ZERO_ERROR; UnicodeString reordered; UnicodeString shaped; int32_t length = ustr.length(); UBiDi *bidi = ubidi_openSized(length, 0, &err); ubidi_setPara(bidi, ustr.getBuffer(), length, UBIDI_DEFAULT_LTR, 0, &err); ubidi_writeReordered(bidi, reordered.getBuffer(length), length, UBIDI_DO_MIRRORING, &err); reordered.releaseBuffer(length); u_shapeArabic(reordered.getBuffer(), length, shaped.getBuffer(length), length, U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_NEAR | U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &err); shaped.releaseBuffer(length); if (U_SUCCESS(err)) { StringCharacterIterator iter(shaped); for (iter.setToStart(); iter.hasNext();) { UChar ch = iter.nextPostInc(); char_info char_dim = character_dimensions(ch); char_dim.format = format; char_dim.avg_height = avg_height; info.add_info(char_dim); } } #if (U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 406) if (ubidi_getBaseDirection(ustr.getBuffer(), length) == UBIDI_RTL) { info.set_rtl(true); } #endif ubidi_close(bidi); }
char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module) { UChar *ustr, *ustr2; if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering return -1; int32_t len = text.length(); ustr = new UChar[len]; ustr2 = new UChar[len]; // Convert UTF-8 string to UTF-16 (UChars) len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err); len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); text.setSize(text.size()*2); len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err); text.setSize(len); delete [] ustr2; delete [] ustr; return 0; }
void TextLayout::computeAdvancesWithICU(SkPaint* paint, const UChar* chars, size_t start, size_t count, size_t contextCount, int dirFlags, jfloat* outAdvances, jfloat* outTotalAdvance) { SkAutoSTMalloc<CHAR_BUFFER_SIZE, jchar> tempBuffer(contextCount); jchar* buffer = tempBuffer.get(); SkScalar* scalarArray = (SkScalar*)outAdvances; // this is where we'd call harfbuzz // for now we just use ushape.c size_t widths; const jchar* text; if (dirFlags & 0x1) { // rtl, call arabic shaping in case UErrorCode status = U_ZERO_ERROR; // Use fixed length since we need to keep start and count valid u_shapeArabic(chars, contextCount, buffer, contextCount, U_SHAPE_LENGTH_FIXED_SPACES_NEAR | U_SHAPE_TEXT_DIRECTION_LOGICAL | U_SHAPE_LETTERS_SHAPE | U_SHAPE_X_LAMALEF_SUB_ALTERNATE, &status); // we shouldn't fail unless there's an out of memory condition, // in which case we're hosed anyway for (int i = start, e = i + count; i < e; ++i) { if (buffer[i] == UNICODE_NOT_A_CHAR) { buffer[i] = UNICODE_ZWSP; // zero-width-space for skia } } text = buffer + start; widths = paint->getTextWidths(text, count << 1, scalarArray); } else { text = chars + start; widths = paint->getTextWidths(text, count << 1, scalarArray); } jfloat totalAdvance = 0; if (widths < count) { #if DEBUG_ADVANCES ALOGD("ICU -- count=%d", widths); #endif // Skia operates on code points, not code units, so surrogate pairs return only // one value. Expand the result so we have one value per UTF-16 code unit. // Note, skia's getTextWidth gets confused if it encounters a surrogate pair, // leaving the remaining widths zero. Not nice. for (size_t i = 0, p = 0; i < widths; ++i) { totalAdvance += outAdvances[p++] = SkScalarToFloat(scalarArray[i]); if (p < count && text[p] >= UNICODE_FIRST_LOW_SURROGATE && text[p] < UNICODE_FIRST_PRIVATE_USE && text[p-1] >= UNICODE_FIRST_HIGH_SURROGATE && text[p-1] < UNICODE_FIRST_LOW_SURROGATE) { outAdvances[p++] = 0; } #if DEBUG_ADVANCES ALOGD("icu-adv = %f - total = %f", outAdvances[i], totalAdvance); #endif } } else { #if DEBUG_ADVANCES ALOGD("ICU -- count=%d", count); #endif for (size_t i = 0; i < count; i++) { totalAdvance += outAdvances[i] = SkScalarToFloat(scalarArray[i]); #if DEBUG_ADVANCES ALOGD("icu-adv = %f - total = %f", outAdvances[i], totalAdvance); #endif } } *outTotalAdvance = totalAdvance; }
int main (int argc, char** argv) { if (argc != 2) { std::cerr << "Usage: " << argv[0] << " <num-iter>" << std::endl; return EXIT_FAILURE; } const unsigned NUM_ITER = atoi(argv[1]); // open first face in the font FT_Library ft_library = 0; FT_Error error = FT_Init_FreeType(&ft_library); if (error) throw std::runtime_error("Failed to initialize FreeType2 library"); FT_Face ft_face[NUM_EXAMPLES]; FT_New_Face(ft_library, "fonts/DejaVuSerif.ttf", 0, &ft_face[ENGLISH]); FT_New_Face(ft_library, "fonts/amiri-0.104/amiri-regular.ttf", 0, &ft_face[ARABIC]); FT_New_Face(ft_library, "fonts/fireflysung-1.3.0/fireflysung.ttf", 0, &ft_face[CHINESE]); // Get our harfbuzz font structs hb_font_t *hb_ft_font[NUM_EXAMPLES]; hb_ft_font[ENGLISH] = hb_ft_font_create(ft_face[ENGLISH], NULL); hb_ft_font[ARABIC] = hb_ft_font_create(ft_face[ARABIC] , NULL); hb_ft_font[CHINESE] = hb_ft_font_create(ft_face[CHINESE], NULL); { std::cerr << "Starting ICU shaping:" << std::endl; progress_timer timer1(std::clog,"ICU shaping done"); UErrorCode err = U_ZERO_ERROR; for (unsigned i = 0; i < NUM_ITER; ++i) { for (unsigned j = 0; j < NUM_EXAMPLES; ++j) { UnicodeString text = UnicodeString::fromUTF8(texts[j]); int32_t length = text.length(); UnicodeString reordered; UnicodeString shaped; UBiDi *bidi = ubidi_openSized(length, 0, &err); ubidi_setPara(bidi, text.getBuffer(), length, UBIDI_DEFAULT_LTR, 0, &err); ubidi_writeReordered(bidi, reordered.getBuffer(length), length, UBIDI_DO_MIRRORING, &err); ubidi_close(bidi); reordered.releaseBuffer(length); u_shapeArabic(reordered.getBuffer(), length, shaped.getBuffer(length), length, U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_NEAR | U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &err); shaped.releaseBuffer(length); if (U_SUCCESS(err)) { U_NAMESPACE_QUALIFIER StringCharacterIterator iter(shaped); for (iter.setToStart(); iter.hasNext();) { UChar ch = iter.nextPostInc(); int32_t glyph_index = FT_Get_Char_Index(ft_face[j], ch); if (i == 0) { std::cerr << glyph_index << ":"; } } if (i == 0) std::cerr << std::endl; } } } } { const char **shaper_list = hb_shape_list_shapers(); for ( ;*shaper_list; shaper_list++) { std::cerr << *shaper_list << std::endl; } std::cerr << "Starting Harfbuzz shaping" << std::endl; progress_timer timer2(std::clog,"Harfbuzz shaping done"); const char* const shapers[] = { /*"ot",*/"fallback" }; hb_buffer_t *buffer(hb_buffer_create()); for (unsigned i = 0; i < NUM_ITER; ++i) { for (unsigned j = 0; j < NUM_EXAMPLES; ++j) { UnicodeString text = UnicodeString::fromUTF8(texts[j]); int32_t length = text.length(); hb_buffer_clear_contents(buffer); //hb_buffer_set_unicode_funcs(buffer.get(), hb_icu_get_unicode_funcs()); hb_buffer_pre_allocate(buffer, length); hb_buffer_add_utf16(buffer, text.getBuffer(), text.length(), 0, length); hb_buffer_set_direction(buffer, text_directions[j]); hb_buffer_set_script(buffer, scripts[j]); hb_buffer_set_language(buffer,hb_language_from_string(languages[j], std::strlen(languages[j]))); //hb_shape(hb_ft_font[j], buffer.get(), 0, 0); hb_shape_full(hb_ft_font[j], buffer, 0, 0, shapers); unsigned num_glyphs = hb_buffer_get_length(buffer); hb_glyph_info_t *glyphs = hb_buffer_get_glyph_infos(buffer, NULL); //hb_glyph_position_t *positions = hb_buffer_get_glyph_positions(buffer.get(), NULL); for (unsigned k=0; k<num_glyphs; ++k) { int32_t glyph_index = glyphs[k].codepoint; if (i == 0) { std::cerr << glyph_index << ":"; } } if (i == 0) std::cerr << std::endl; } } hb_buffer_destroy(buffer); } // cleanup for (int j=0; j < NUM_EXAMPLES; ++j) { hb_font_destroy(hb_ft_font[j]); } FT_Done_FreeType(ft_library); return EXIT_SUCCESS; }
static void shape_text(text_line & line, text_itemizer & itemizer, std::map<unsigned,double> & width_map, face_manager_freetype & font_manager, double scale_factor ) { unsigned start = line.first_char(); unsigned end = line.last_char(); mapnik::value_unicode_string const& text = itemizer.text(); size_t length = end - start; if (!length) return; line.reserve(length); std::list<text_item> const& list = itemizer.itemize(start, end); UErrorCode err = U_ZERO_ERROR; mapnik::value_unicode_string shaped; mapnik::value_unicode_string reordered; for (auto const& text_item : list) { face_set_ptr face_set = font_manager.get_face_set(text_item.format->face_name, text_item.format->fontset); double size = text_item.format->text_size * scale_factor; face_set->set_unscaled_character_sizes(); for (auto const& face : *face_set) { UBiDi *bidi = ubidi_openSized(length, 0, &err); ubidi_setPara(bidi, text.getBuffer(), length, UBIDI_DEFAULT_LTR, 0, &err); ubidi_writeReordered(bidi, reordered.getBuffer(length), length, UBIDI_DO_MIRRORING, &err); ubidi_close(bidi); reordered.releaseBuffer(length); int32_t num_char = u_shapeArabic(reordered.getBuffer(), length, shaped.getBuffer(length), length, U_SHAPE_LETTERS_SHAPE | U_SHAPE_LENGTH_FIXED_SPACES_NEAR | U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &err); if (num_char < 0) { MAPNIK_LOG_ERROR(icu_shaper) << " u_shapeArabic returned negative num_char " << num_char; } std::size_t num_chars = static_cast<std::size_t>(num_char); shaped.releaseBuffer(length); bool shaped_status = true; if (U_SUCCESS(err) && (num_chars == length)) { U_NAMESPACE_QUALIFIER StringCharacterIterator iter(shaped); unsigned i = 0; for (iter.setToStart(); iter.hasNext();) { UChar ch = iter.nextPostInc(); glyph_info tmp; tmp.offset.clear(); tmp.char_index = i; tmp.glyph_index = FT_Get_Char_Index(face->get_face(), ch); if (tmp.glyph_index == 0) { shaped_status = false; break; } tmp.face = face; tmp.format = text_item.format; face->glyph_dimensions(tmp); tmp.scale_multiplier = size / face->get_face()->units_per_EM; width_map[i] += tmp.advance(); line.add_glyph(std::move(tmp), scale_factor); ++i; } } if (!shaped_status) continue; line.update_max_char_height(face->get_char_height(size)); return; } } }
void font_face_set::get_string_info(string_info & info) { unsigned width = 0; unsigned height = 0; UErrorCode err = U_ZERO_ERROR; UnicodeString const& ustr = info.get_string(); const UChar * text = ustr.getBuffer(); UBiDi * bidi = ubidi_openSized(ustr.length(),0,&err); if (U_SUCCESS(err)) { ubidi_setPara(bidi,text,ustr.length(), UBIDI_DEFAULT_LTR,0,&err); if (U_SUCCESS(err)) { int32_t count = ubidi_countRuns(bidi,&err); int32_t logicalStart; int32_t length; for (int32_t i=0; i< count;++i) { if (UBIDI_LTR == ubidi_getVisualRun(bidi,i,&logicalStart,&length)) { do { UChar ch = text[logicalStart++]; dimension_t char_dim = character_dimensions(ch); info.add_info(ch, char_dim.width, char_dim.height); width += char_dim.width; height = char_dim.height > height ? char_dim.height : height; } while (--length > 0); } else { logicalStart += length; int32_t j=0,i=length; UnicodeString arabic; UChar * buf = arabic.getBuffer(length); do { UChar ch = text[--logicalStart]; buf[j++] = ch; } while (--i > 0); arabic.releaseBuffer(length); if ( *arabic.getBuffer() >= 0x0600 && *arabic.getBuffer() <= 0x06ff) { UnicodeString shaped; u_shapeArabic(arabic.getBuffer(),arabic.length(),shaped.getBuffer(arabic.length()),arabic.length(), U_SHAPE_LETTERS_SHAPE|U_SHAPE_LENGTH_FIXED_SPACES_NEAR| U_SHAPE_TEXT_DIRECTION_VISUAL_LTR ,&err); shaped.releaseBuffer(arabic.length()); if (U_SUCCESS(err)) { for (int j=0;j<shaped.length();++j) { dimension_t char_dim = character_dimensions(shaped[j]); info.add_info(shaped[j], char_dim.width, char_dim.height); width += char_dim.width; height = char_dim.height > height ? char_dim.height : height; } } } else { // Non-Arabic RTL for (int j=0;j<arabic.length();++j) { dimension_t char_dim = character_dimensions(arabic[j]); info.add_info(arabic[j], char_dim.width, char_dim.height); width += char_dim.width; height = char_dim.height > height ? char_dim.height : height; } } } } } ubidi_close(bidi); } info.set_dimensions(width, height); }