void calcLinebreaksUtf8( const char *str, size_t strLength, std::vector<uint8_t> *resultBreaks ) { resultBreaks->resize( strLength, 0 ); // our UnicodeBreaks enum lines up with the liblinebreak definitions set_linebreaks_utf8( (const uint8_t*)str, strLength, NULL, (char*)&(*resultBreaks)[0] ); }
void ZLTextParagraphCursor::Builder::processTextEntry(const ZLTextEntry &textEntry) { const std::size_t dataLength = textEntry.dataLength(); if (dataLength == 0) { return; } myUcs4String.clear(); ZLUnicodeUtil::utf8ToUcs4(myUcs4String, textEntry.data(), dataLength); int len = myUcs4String.size(); myUcs4String.push_back(0); myBidiLevels.clear(); myBidiLevels.assign(len + 1, 0); int firstNonSpace = 0; while ((firstNonSpace < len) && ZLUnicodeUtil::isSpace(myUcs4String[firstNonSpace])) { myBidiLevels[firstNonSpace++] = myLatestBidiLevel; } int lastNonSpace = len - 1; if (lastNonSpace > firstNonSpace - 1) { while (ZLUnicodeUtil::isSpace(myUcs4String[lastNonSpace])) { --lastNonSpace; } fribidi_log2vis((FriBidiChar*)&myUcs4String[firstNonSpace], lastNonSpace - firstNonSpace + 1, &myBidiCharType, 0, 0, 0, &myBidiLevels[firstNonSpace]); } myLatestBidiLevel = myBidiLevels[lastNonSpace]; for (int i = lastNonSpace; i < len; ++i) { myBidiLevels[i] = myLatestBidiLevel; } myBreaksTable.clear(); myBreaksTable.assign(dataLength, 0); const char *start = textEntry.data(); const char *end = start + dataLength; set_linebreaks_utf8((const utf8_t*)start, dataLength, myLanguage.c_str(), &myBreaksTable[0]); ZLUnicodeUtil::Ucs4Char ch = 0, previousCh; enum { NO_SPACE, SPACE, NON_BREAKABLE_SPACE } spaceState = NO_SPACE; int charLength = 0; int index = 0; const char *wordStart = start; updateBidiLevel(myBidiLevels[0]); for (const char *ptr = start; ptr < end; ptr += charLength, ++index) { previousCh = ch; charLength = ZLUnicodeUtil::firstChar(ch, ptr); if (ZLUnicodeUtil::isSpace(ch)) { if ((spaceState == NO_SPACE) && (ptr != wordStart)) { addWord(wordStart, myOffset + (wordStart - start), ptr - wordStart); } spaceState = SPACE; } else if (ZLUnicodeUtil::isNBSpace(ch)) { if (spaceState == NO_SPACE) { if (ptr != wordStart) { addWord(wordStart, myOffset + (wordStart - start), ptr - wordStart); } spaceState = NON_BREAKABLE_SPACE; } } else { switch (spaceState) { case SPACE: if ((myBreaksTable[ptr - start - 1] == LINEBREAK_NOBREAK) || (previousCh == '-')) { myElements.push_back(ZLTextElementPool::Pool.NBHSpaceElement); } else { myElements.push_back(ZLTextElementPool::Pool.HSpaceElement); } wordStart = ptr; break; case NON_BREAKABLE_SPACE: myElements.push_back(ZLTextElementPool::Pool.NBHSpaceElement); wordStart = ptr; break; case NO_SPACE: if ((ptr > start) && ((((myBreaksTable[ptr - start - 1] != LINEBREAK_NOBREAK) && (previousCh != '-')) && (ptr != wordStart)) || (myBidiLevels[index - 1] != myBidiLevels[index]))) { addWord(wordStart, myOffset + (wordStart - start), ptr - wordStart); wordStart = ptr; } break; } spaceState = NO_SPACE; } updateBidiLevel(myBidiLevels[index]); } switch (spaceState) { case SPACE: myElements.push_back(ZLTextElementPool::Pool.HSpaceElement); break; case NON_BREAKABLE_SPACE: myElements.push_back(ZLTextElementPool::Pool.NBHSpaceElement); break; case NO_SPACE: addWord(wordStart, myOffset + (wordStart - start), end - wordStart); break; } myOffset += dataLength; }
/* *func:计算给定字符串text 在给定宽度内以自动断行的方式 * 能输出多少行,并返回每一行的指针 *gc[IN]:字符输出的系统环境 *width[IN]:给定宽度 *lang[IN]:文本本地化代码标志 *text[IN]:给定字符串 *len[IN]:给定字符串的字节长 *max_line[IN]:给定的最大可输出行数,即line_len的数组大小,防止越界,请注意不能小于2 *line_from[IN,OUT]:需要外部分配内存后传入,用于返回各行的字符串在 给定字符串text :中的索引,若为NULL,表示 不需要返回字节长度 *line_len[IN,OUT]:需要外部分配内存后传入,用于返回各行的字符串字节长度 :若为NULL,表示 不需要返回字节长度 *line_width[IN,OUT]:用于返回各行的字符串输出的实际宽度, 需要外部分配提供,大小同line_len,如果为NULL,表示不需要返回宽度 *返回:能输出的行数,-1为出错 *常用用法:1 . 获取字符串能输出的行数 lines = utf8GetTextExWordBreakLines(gc,screenwidth,lang,pStr,strlen(pStr),100,NULL,NULL,NULL); 2 . 获取字符串能输出的行数及各行的信息(长度、位置) int *line_from = malloc(100); int *line_len = malloc(100); lines = utf8GetTextExWordBreakLines(gc,screenwidth,lang,pStr,strlen(pStr),100,line_from,line_len,NULL); for(i=0;i<lines;i++) drawText(gc,pStr+line_from[i],line_len[i]); */ int utf8GetTextExWordBreakLines(GR_GC_ID gc, GR_SIZE width, const char* lang, const char *text, size_t len, int max_line,int line_from[],int line_len[],int line_width[]) { GR_SIZE w, h, b; GR_SIZE last_w = 0;//当前字符宽度超过给定宽度时,用于回退到上一次小于给定宽度时的宽度 char *brks;//保存各字节能否break的信息 int i; int line_start;//每行行首所在(相对于整个字符串) int last_word;//上一次字符串宽度小于给定宽度时的那个字符所在 int line_count = 0;//可输出行数 int word_locate; if(len <=0){ return 0; } NxGetGCTextSize(gc, text, len, MWTF_UTF8, &w, &h, &b); if (w <= width) {//不够输出一行 if(line_len){ line_len[line_count] = len; } if(line_from){ line_from[line_count] = 0; } if(line_width){ line_width[line_count] = w; } line_count = 1; return line_count; } brks = app_malloc(len); if (brks == NULL) return -1; const char* text_lang = get_text_lang(UTF8, text, len); /* Show the breaking points */ set_linebreaks_utf8(text, len, text_lang, brks); line_start = 0; last_word = 0; for (i = 1; i <= len; i++) { switch (brks[i-1]) { case LINEBREAK_MUSTBREAK: case LINEBREAK_ALLOWBREAK: NxGetGCTextSize(gc, text + line_start, i - line_start, MWTF_UTF8, &w, &h, &b); if (w > width) { if (last_word > line_start) { if (w - last_w > width) { int partial_len; GR_SIZE partial_w; partial_len = locate_word_to_fit(gc, width - last_w, text + last_word, (i - last_word), &partial_w); last_word += partial_len; last_w += partial_w; } if(line_len){ line_len[line_count] = last_word - line_start; } if(line_from){ line_from[line_count] = line_start; } if(line_width){ line_width[line_count] = last_w; } line_count++; line_start = last_word; w = w - last_w; if(line_count>=max_line){ app_free(brks); return line_count; } } /* too long word */ while (w > width) { word_locate = locate_word_to_fit(gc, width, text + line_start, i - line_start, &last_w); if(line_len){ line_len[line_count] = word_locate; } if(line_from){ line_from[line_count] = line_start; } if(line_width){ line_width[line_count] = last_w; } line_start += word_locate; line_count++; w = w - last_w; if(line_count>=max_line){ app_free(brks); return line_count; } } } if (brks[i-1] == LINEBREAK_MUSTBREAK || w == width) { if(line_len){ line_len[line_count] = i - line_start; } if(line_from){ line_from[line_count] = line_start; } if(line_width){ line_width[line_count] = w; } line_count++; line_start = i; last_word = line_start; last_w = 0; if(line_count>=max_line){ app_free(brks); return line_count; } } else { last_word = i; last_w = w; } break; } } if (line_start < len) { if(line_count>=max_line){ app_free(brks); return line_count; } if(line_len){ line_len[line_count] = len - line_start; } if(line_from){ line_from[line_count] = line_start; } if(line_width){ line_width[line_count] = w; } line_count++; } app_free(brks); return line_count; }
void lineBreakUtf8( const char *line, const std::function<bool(const char *, size_t)> &measureFn, std::function<void(const char *,size_t)> lineProcessFn ) { const size_t lengthInBytes = strlen( line ); shared_ptr<char> brks = shared_ptr<char>( (char*)malloc( lengthInBytes ), free ); set_linebreaks_utf8( (const uint8_t*)line, lengthInBytes, NULL, brks.get() ); // Byte-suffixed variables correspond to a byte in the UTF8 string, as opposed to the character // binary search for the threshold where measureFn() returns false; emerges as curChar size_t charLen = stringLengthUtf8( line, lengthInBytes ); size_t lineStartByte = 0, lineEndByte = 0; size_t lineStartChar = 0; while( lineStartChar < charLen ) { int minChar = 0, maxChar = charLen - lineStartChar /*[minChar,maxChar)*/, curChar = 0; // test to see if we're already on a mustbreak if( brks.get()[lineStartByte] != 0 ) { // update our maxChar to reflect any MUSTBREAKS int maxCharWithMustBreaks = minChar; size_t maxCharByte = lineStartByte; while( maxCharWithMustBreaks < maxChar ) { nextCharUtf8( line, &maxCharByte, lengthInBytes ); if( brks.get()[maxCharByte] == 0 ) { maxCharWithMustBreaks++; break; } else ++maxCharWithMustBreaks; } maxChar = maxCharWithMustBreaks + 1; while( minChar < maxChar ) { curChar = minChar + (maxChar-minChar+1)/2; size_t newByte = advanceCharUtf8( line + lineStartByte, curChar, lengthInBytes ); if( ! measureFn( line + lineStartByte, newByte ) ) maxChar = curChar - 1; else minChar = curChar; } curChar = minChar; } else { // we started at a mustbreak curChar = 1; } // find ideal place to perform the break, either at curChar or before depending on breaks size_t lineEndByteAfterBreaking = lineEndByte = advanceCharUtf8( line + lineStartByte, curChar ) + lineStartByte; if( ( lineEndByteAfterBreaking < lengthInBytes ) /*&& ( ! shouldBreak( brks.get()[lineEndByteAfterBreaking] ) )*/ ) { while( (lineEndByteAfterBreaking > lineStartByte) && ( ! shouldBreak( brks.get()[lineEndByteAfterBreaking-1] ) ) ) lineEndByteAfterBreaking--; if( lineEndByteAfterBreaking == lineStartByte ) // there's no good breakpoint; just break where we would have lineEndByteAfterBreaking = lineEndByte; } lineProcessFn( line + lineStartByte, lineEndByteAfterBreaking - lineStartByte ); lineStartChar += stringLengthUtf8( line + lineStartByte, lineEndByteAfterBreaking - lineStartByte ); lineStartByte = lineEndByteAfterBreaking; // eat any spaces we'd start on on the next line size_t tempByte = lineStartByte; while( nextCharUtf8( line, &tempByte, lengthInBytes ) == (uint32_t)' ') { lineStartByte = tempByte; ++lineStartChar; } } }