static bool MCConvertNativeFromUTF16(const uint16_t *p_chars, uint32_t p_char_count, uint8_t*& r_output, uint32_t& r_output_length) { uint8_t *t_output; uint32_t t_output_length; if (!MCMemoryAllocate(p_char_count, t_output)) return false; uint32_t t_index; t_index = 0; t_output_length = 0; while(t_index < p_char_count) { if (p_chars[t_index] < 128 && (t_index == p_char_count - 1 || p_chars[t_index + 1] < 128)) { t_output[t_output_length++] = (char)p_chars[t_index]; t_index += 1; } else { uint32_t t_start; t_start = t_index; uint32_t t_codepoint; t_codepoint = MCUnicodeCodepointAdvance((const uint2 *)p_chars, p_char_count, t_index); while(t_index < p_char_count) { uint4 t_old_index; t_old_index = t_index; t_codepoint = MCUnicodeCodepointAdvance((const uint2 *)p_chars, p_char_count, t_index); if (MCUnicodeCodepointIsBase(t_codepoint)) { t_index = t_old_index; break; } } uint8_t t_char; if (!MCUnicodeMapToNative(p_chars + t_start, t_index - t_start, t_char)) t_char = '?'; t_output[t_output_length++] = t_char; } } MCMemoryReallocate(t_output, t_output_length, t_output); r_output = t_output; r_output_length = t_output_length; return true; }
void MCFontBreakText(MCFontRef p_font, MCStringRef p_text, MCRange p_range, MCFontBreakTextCallback p_callback, void *p_callback_data, bool p_rtl) { // MW-2013-12-19: [[ Bug 11559 ]] If the font has a nil font, do nothing. if (p_font -> fontstruct == nil) return; // If the text is small enough, don't bother trying to break it /*if (p_length <= (kMCFontBreakTextCharLimit * (p_is_unicode ? 2 : 1))) { p_callback(p_font, p_text, p_length, p_is_unicode, p_callback_data); return; }*/ //p_callback(p_font, p_text, p_length, p_is_unicode, p_callback_data); //return; // Scan forward in the string for possible break locations. Breaks are // assigned a quality value as some locations are better for breaking than // others. The qualities are: // // 0. no break found // 1. grapheme break found // 2. URL break found ('/' char) // 3. word break found // // This isn't a particularly good algorithm but should suffice until full // Unicode support is added and a proper breaking algorithm implemented. uint32_t t_stride; t_stride = kMCFontBreakTextCharLimit; uindex_t t_end = p_range.offset + p_range.length; uindex_t t_length = p_range.length; uindex_t t_offset = (p_rtl) ? 0 : p_range.offset; while (t_length > 0) { int t_break_quality; uindex_t t_break_point, t_index; t_break_quality = 0; t_break_point = 0; t_index = 0; // Find the best break within the next stride characters. If there are // no breaking points, extend beyond the stride until one is found. while ((t_index < t_stride || t_break_quality == 0) && t_index < t_length) { codepoint_t t_char; uindex_t t_advance; if (p_rtl) t_char = MCStringGetCharAtIndex(p_text, t_end - t_index - t_offset); else t_char = MCStringGetCharAtIndex(p_text, t_offset + t_index); if (MCUnicodeCodepointIsHighSurrogate(t_char)) { // Surrogate pair if (p_rtl) t_char = MCUnicodeSurrogatesToCodepoint(t_char, MCStringGetCharAtIndex(p_text, t_end - t_index - t_offset - 1)); else t_char = MCUnicodeSurrogatesToCodepoint(t_char, MCStringGetCharAtIndex(p_text, t_offset + t_index + 1)); t_advance = 2; } else { t_advance = 1; } // Prohibit breaks at the beginning of the string if (t_index == 0) { t_index += t_advance; continue; } if (t_char == ' ') { t_break_point = t_index; t_break_quality = 3; } else if (t_break_quality < 3 && t_char == '/') { t_break_point = t_index; t_break_quality = 2; } else if (t_break_quality < 2 && MCUnicodeCodepointIsBase(t_char)) { t_break_point = t_index; t_break_quality = 1; } else if (t_break_quality < 2 && t_char > 0xFFFF) { // Character outside BMP, assume can break here t_break_point = t_index; t_break_quality = 1; } // If the break point is a word boundary, don't look for a later // breaking point. Words are cached as-is where possible. if (t_break_quality == 3) break; // Advance to the next character t_index += t_advance; } // If no word break was found and the whole of the remaining text was // searched and the remaining text is smaller than the break size then // don't attempt a break just for the sake of it. if (t_break_quality < 3 && t_length < kMCFontBreakTextCharLimit) t_break_point = t_length; // If no break point was found, just process the whole line if (t_break_quality == 0) t_break_point = t_length; // Process this chunk of text MCRange t_range; if (p_rtl) t_range = MCRangeMake(t_end - t_offset - t_break_point, t_break_point); else t_range = MCRangeMake(t_offset, t_break_point); #if !defined(_WIN32) && !defined(_ANDROID_MOBILE) // This is a really ugly hack to get LTR/RTL overrides working correctly - // ATSUI and Pango think they know better than us and won't let us suppress // the BiDi algorithm they uses for text layout. So instead, we need to add // an LRO or RLO character in front of every single bit of text :-( MCAutoStringRef t_temp; unichar_t t_override; if (p_rtl) t_override = 0x202E; else t_override = 0x202D; /* UNCHECKED */ MCStringCreateMutable(0, &t_temp); /* UNCHECKED */ MCStringAppendChar(*t_temp, t_override); /* UNCHECKED */ MCStringAppendSubstring(*t_temp, p_text, t_range); /* UNCHECKED */ MCStringAppendChar(*t_temp, 0x202C); p_callback(p_font, *t_temp, MCRangeMake(0, MCStringGetLength(*t_temp)), p_callback_data); #else // Another ugly hack - this time, to avoid incoming strings being coerced // into Unicode strings needlessly (because the drawing code uses unichars). // Do a mutable copy (to ensure an actual copy) before drawing. MCAutoStringRef t_temp; /* UNCHECKED */ MCStringMutableCopySubstring(p_text, t_range, &t_temp); p_callback(p_font, *t_temp, MCRangeMake(0, t_range.length), p_callback_data); #endif // Explicitly show breaking points //p_callback(p_font, MCSTR("|"), MCRangeMake(0, 1), p_callback_data); // Move on to the next chunk t_offset += t_break_point; // SN-2014-07-23: [[ Bug 12910 ]] Script editor crashes // Make sure we get 0 as a minimum, not a negative value since t_length is a uindex_t. if (t_length < t_break_point) t_length = 0; else t_length -= t_break_point; } }
void MCFontBreakText(MCFontRef p_font, const char *p_text, uint32_t p_length, bool p_is_unicode, MCFontBreakTextCallback p_callback, void *p_callback_data) { // MW-2013-12-19: [[ Bug 11559 ]] If the font has a nil font, do nothing. if (p_font -> fontstruct == nil) return; // If the text is small enough, don't bother trying to break it /*if (p_length <= (kMCFontBreakTextCharLimit * (p_is_unicode ? 2 : 1))) { p_callback(p_font, p_text, p_length, p_is_unicode, p_callback_data); return; }*/ //p_callback(p_font, p_text, p_length, p_is_unicode, p_callback_data); //return; // Scan forward in the string for possible break locations. Breaks are // assigned a quality value as some locations are better for breaking than // others. The qualities are: // // 0. no break found // 1. grapheme break found // 2. URL break found ('/' char) // 3. word break found // // This isn't a particularly good algorithm but should suffice until full // Unicode support is added and a proper breaking algorithm implemented. uint32_t t_stride; t_stride = kMCFontBreakTextCharLimit * (p_is_unicode ? 2 : 1); while (p_length > 0) { int t_break_quality; uint32_t t_break_point, t_index; t_break_quality = 0; t_break_point = 0; t_index = 0; uint32_t t_length; t_length = p_length / (p_is_unicode ? 2 : 1); // Find the best break within the next stride characters. If there are // no breaking points, extend beyond the stride until one is found. while ((t_index < t_stride || t_break_quality == 0) && t_index < t_length) { uint32_t t_char; uint32_t t_advance; if (p_is_unicode) { t_char = ((unichar_t*)p_text)[t_index]; if (0xD800 <= t_char && t_char < 0xDC00) { // Surrogate pair t_char = ((t_char - 0xD800) << 10) + (((unichar_t*)p_text)[t_index+1] - 0xDC00); t_advance = 2; } else { t_advance = 1; } } else { t_char = p_text[t_index]; t_advance = 1; } // Prohibit breaks at the beginning of the string if (t_index == 0) { t_index += t_advance; continue; } if (t_char == ' ') { t_break_point = t_index; t_break_quality = 3; } else if (t_break_quality < 3 && t_char == '/') { t_break_point = t_index; t_break_quality = 2; } else if (t_break_quality < 2 && MCUnicodeCodepointIsBase(t_char)) { t_break_point = t_index; t_break_quality = 1; } else if (t_break_quality < 2 && 0xDC00 <= t_char && t_char <= 0xDFFF) { // Trailing character of surrogate pair t_break_point = t_index; t_break_quality = 1; } // If the break point is a word boundary, don't look for a later // breaking point. Words are cached as-is where possible. if (t_break_quality == 3) break; // Advance to the next character t_index += t_advance; } // If no word break was found and the whole of the remaining text was // searched and the remaining text is smaller than the break size then // don't attempt a break just for the sake of it. if (t_break_quality < 3 && t_length < kMCFontBreakTextCharLimit) t_break_point = t_length; // If no break point was found, just process the whole line if (t_break_quality == 0) t_break_point = t_length; // Process this chunk of text uint32_t t_byte_len; t_byte_len = t_break_point * (p_is_unicode ? 2 : 1); p_callback(p_font, p_text, t_byte_len, p_is_unicode, p_callback_data); // Explicitly show breaking points //p_callback(p_font, "|", 1, false, p_callback_data); // Move on to the next chunk p_text += t_byte_len; p_length -= t_byte_len; } }