static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator, UBreakIteratorType type, const UChar* string, int length) { if (!string) return 0; if (!createdIterator) { iterator = new TextBreakIterator(); createdIterator = true; } if (!iterator) return 0; if (!iterator->m_charIterator.setText(string, length)) return 0; int charLength = iterator->m_charIterator.getLength(); iterator->m_type = type; if (createdIterator) g_free(iterator->m_logAttrs); iterator->m_logAttrs = g_new0(PangoLogAttr, charLength + 1); pango_get_log_attrs(iterator->m_charIterator.getText(), iterator->m_charIterator.getSize(), -1, 0, iterator->m_logAttrs, charLength + 1); return iterator; }
static VALUE rpango_get_log_attrs(VALUE self, VALUE text, VALUE level, VALUE language) { gint i, len; glong attrs_len; PangoLogAttr *attrs; const gchar *gtext; VALUE ret; gtext = StringValuePtr(text); len = RSTRING_LEN(text); attrs_len = g_utf8_strlen(gtext, (gssize)len) + 1l; attrs = g_new0(PangoLogAttr, attrs_len); pango_get_log_attrs(gtext, len, NUM2INT(level), RVAL2BOXED(language, PANGO_TYPE_LANGUAGE), attrs, attrs_len); ret = rb_ary_new(); for (i = 0; i < attrs_len; i++){ rb_ary_push(ret, BOXED2RVAL(&attrs[i], PANGO_TYPE_LOG_ATTR)); } g_free(attrs); return ret; }
static void check_invariants (const char *text) { int len; PangoLogAttr *attrs; if (!g_utf8_validate (text, -1, NULL)) fail ("Invalid UTF-8 in test text"); len = g_utf8_strlen (text, -1); attrs = g_new0 (PangoLogAttr, len + 1); pango_get_log_attrs (text, -1, 0, pango_language_from_string ("C"), attrs, len + 1); check_line_invariants (text, attrs); check_sentence_invariants (text, attrs); check_grapheme_invariants (text, attrs); check_word_invariants (text, attrs); #if 0 print_sentences (text, attrs); #endif g_free (attrs); }
static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator, UBreakIteratorType type, const UChar* string, int length) { if (!string) return 0; if (!createdIterator) { iterator = new TextBreakIterator(); createdIterator = true; } if (!iterator) return 0; long utf8len; GOwnPtr<char> utf8; utf8.set(g_utf16_to_utf8(string, length, 0, &utf8len, 0)); // FIXME: assumes no surrogate pairs iterator->m_type = type; iterator->m_length = length; if (createdIterator) g_free(iterator->m_logAttrs); iterator->m_logAttrs = g_new0(PangoLogAttr, length + 1); iterator->m_index = -1; pango_get_log_attrs(utf8.get(), utf8len, -1, 0, iterator->m_logAttrs, length + 1); return iterator; }
void EditorClient::checkSpellingOfString(const UChar* text, int length, int* misspellingLocation, int* misspellingLength) { GSList* dicts = webkit_web_settings_get_enchant_dicts(m_webView); if (!dicts) return; gchar* ctext = g_utf16_to_utf8(const_cast<gunichar2*>(text), length, 0, 0, 0); int utflen = g_utf8_strlen(ctext, -1); PangoLanguage* language = pango_language_get_default(); PangoLogAttr* attrs = g_new(PangoLogAttr, utflen+1); // pango_get_log_attrs uses an aditional position at the end of the text. pango_get_log_attrs(ctext, -1, -1, language, attrs, utflen+1); for (int i = 0; i < length+1; i++) { // We go through each character until we find an is_word_start, // then we get into an inner loop to find the is_word_end corresponding // to it. if (attrs[i].is_word_start) { int start = i; int end = i; int wordLength; while (attrs[end].is_word_end < 1) end++; wordLength = end - start; // Set the iterator to be at the current word end, so we don't // check characters twice. i = end; for (; dicts; dicts = dicts->next) { EnchantDict* dict = static_cast<EnchantDict*>(dicts->data); gchar* cstart = g_utf8_offset_to_pointer(ctext, start); gint bytes = static_cast<gint>(g_utf8_offset_to_pointer(ctext, end) - cstart); gchar* word = g_new0(gchar, bytes+1); int result; g_utf8_strncpy(word, cstart, end - start); result = enchant_dict_check(dict, word, -1); g_free(word); if (result) { *misspellingLocation = start; *misspellingLength = wordLength; } else { // Stop checking, this word is ok in at least one dict. *misspellingLocation = -1; *misspellingLength = 0; break; } } } } g_free(attrs); g_free(ctext); }
static void calculate_info (CeditDocument *doc, GtkTextIter *start, GtkTextIter *end, gint *chars, gint *words, gint *white_chars, gint *bytes) { gchar *text; cedit_debug (DEBUG_PLUGINS); text = gtk_text_buffer_get_slice (GTK_TEXT_BUFFER (doc), start, end, TRUE); *chars = g_utf8_strlen (text, -1); *bytes = strlen (text); if (*chars > 0) { PangoLogAttr *attrs; gint i; attrs = g_new0 (PangoLogAttr, *chars + 1); pango_get_log_attrs (text, -1, 0, pango_language_from_string ("C"), attrs, *chars + 1); for (i = 0; i < (*chars); i++) { if (attrs[i].is_white) ++(*white_chars); if (attrs[i].is_word_start) ++(*words); } g_free (attrs); } else { *white_chars = 0; *words = 0; } g_free (text); }
static VALUE rg_s_get_log_attrs(G_GNUC_UNUSED VALUE self, VALUE rbtext, VALUE rblevel, VALUE rblanguage) { const gchar *text = RVAL2CSTR(rbtext); long length = RSTRING_LEN(rbtext); int level = NUM2INT(rblevel); PangoLanguage *language = RVAL2PANGOLANGUAGE(rblanguage); long n = g_utf8_strlen(text, length) + 1; PangoLogAttr *attrs = g_new(PangoLogAttr, n); pango_get_log_attrs(text, length, level, language, attrs, n); return PANGOLOGATTRS2RVAL_FREE(attrs, n); }
void NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength, PRUint8* aBreakBefore) { NS_ASSERTION(aText, "aText shouldn't be null"); memset(aBreakBefore, PR_FALSE, aLength * sizeof(PRUint8)); nsAutoTArray<PangoLogAttr, 2000> attrBuffer; if (!attrBuffer.AppendElements(aLength + 1)) return; NS_ConvertUTF16toUTF8 aUTF8(aText, aLength); const gchar* p = aUTF8.Data(); const gchar* end = p + aUTF8.Length(); PRUint32 u16Offset = 0; static PangoLanguage* language = pango_language_from_string("en"); while (p < end) { PangoLogAttr* attr = attrBuffer.Elements(); pango_get_log_attrs(p, end - p, -1, language, attr, attrBuffer.Length()); while (p < end) { aBreakBefore[u16Offset] = attr->is_line_break; if (NS_IS_LOW_SURROGATE(aText[u16Offset])) aBreakBefore[++u16Offset] = PR_FALSE; // Skip high surrogate ++u16Offset; PRBool err; PRUint32 ch = UTF8CharEnumerator::NextChar(&p, end, &err); ++attr; if (ch == 0 || err) { // pango_break (pango 1.16.2) only analyses text before the // first NUL (but sets one extra attr). Workaround loop to call // pango_break again to analyse after the NUL is done somewhere else // (gfx/thebes/gfxPangoFonts.cpp: SetupClusterBoundaries()). // So, we do the same here for pango_get_log_attrs. break; } } } }
void make_stats (t_note_page *doc) { if (! doc) return; if (! doc->text_buffer) return; gboolean selected = TRUE; gchar *text; PangoLogAttr *attrs; gint words = 0; gint chars = 0; gint white_chars = 0; gint lines = 0; gint bytes = 0; gint i; selected = doc_is_sel (GTK_TEXT_BUFFER(doc->text_buffer)); if (! selected) text = doc_get_buf (GTK_TEXT_BUFFER(doc->text_buffer)); else text = doc_get_sel (doc); if (! text) return; if (! g_utf8_validate (text, -1, NULL)) return; lines = gtk_text_buffer_get_line_count (GTK_TEXT_BUFFER(doc->text_buffer)); chars = g_utf8_strlen (text, -1); attrs = g_new0 (PangoLogAttr, chars + 1); pango_get_log_attrs (text, -1, 0, pango_language_from_string ("C"), attrs, chars + 1); i = 0; while (i < chars) { if (attrs [i].is_white) ++white_chars; if (attrs [i].is_word_start) ++words; ++i; } if (chars == 0) lines = 0; bytes = strlen (text); gchar *s_bytes = g_strdup_printf(_("bytes: %d\n"), bytes); gchar *s_lines = g_strdup_printf(_("lines: %d\n"), lines); gchar *s_words = g_strdup_printf(_("words: %d\n"), words); gchar *s_chars = g_strdup_printf(_("chars: %d\n"), chars); gchar *s_charsnsp = g_strdup_printf(_("chars non-space: %d\n"), chars - white_chars); gchar *result; if (! selected) result = g_strconcat (_("stats for "), doc->file_name, ":\n", s_charsnsp, s_chars, s_words, s_lines, s_bytes, NULL); else result = g_strconcat (_("stats for selected:\n"), s_charsnsp, s_chars, s_words, s_bytes, NULL); g_free (s_bytes); g_free (s_charsnsp); g_free (s_chars); g_free (s_words); g_free (s_lines); g_free (result); g_free (attrs); g_free (text); }
WTextItem FontSupport::measureText(const WFont& font, const WString& text, double maxWidth, bool wordWrap) { PANGO_LOCK; enabledFontFormats = enabledFontFormats_; /* * Note: accurate measuring on a bitmap requires that the transformation * is applied, because hinting may push chars to boundaries e.g. when * rotated (or scaled too?) */ std::string utf8 = text.toUTF8(); const char *s = utf8.c_str(); if (wordWrap) { int utflen = g_utf8_strlen(s, -1); PangoLogAttr *attrs = new PangoLogAttr[utflen + 1]; PangoLanguage *language = pango_language_from_string("en-US"); pango_get_log_attrs(s, utf8.length(), -1, language, attrs, utflen + 1); double w = 0, nextW = -1; int current = 0; int measured = 0; int end = 0; bool maxWidthReached = false; for (int i = 0; i < utflen + 1; ++i) { if (i == utflen || attrs[i].is_line_break) { int cend = g_utf8_offset_to_pointer(s, end) - s; WTextItem ti = measureText(font, WString::fromUTF8(utf8.substr(measured, cend - measured)), -1, false); if (isEpsilonMore(w + ti.width(), maxWidth)) { nextW = ti.width(); maxWidthReached = true; break; } else { measured = cend; current = g_utf8_offset_to_pointer(s, i) - s; w += ti.width(); if (i == utflen) { w += measureText(font, WString::fromUTF8(utf8.substr(measured)), -1, false).width(); measured = utf8.length(); } } } if (!attrs[i].is_white) end = i + 1; } delete[] attrs; if (maxWidthReached) { return WTextItem(WString::fromUTF8(utf8.substr(0, current)), w, nextW); } else { /* * For some reason, the sum of the individual widths is a bit less * (for longer stretches of text), so we re-measure it ! */ w = measureText(font, WString::fromUTF8(utf8.substr(0, measured)), -1, false).width(); return WTextItem(text, w); } } else { std::vector<PangoGlyphString *> glyphs; int width; GList *items = layoutText(font, utf8, glyphs, width); double w = pangoUnitsToDouble(width); for (unsigned i = 0; i < glyphs.size(); ++i) pango_glyph_string_free(glyphs[i]); g_list_foreach(items, (GFunc) pango_item_free, nullptr); g_list_free(items); return WTextItem(text, w); } }
gboolean SidebarIndexPage::treeSearchFunction(GtkTreeModel* model, gint column, const gchar* key, GtkTreeIter* iter, SidebarIndexPage* sidebar) { XOJ_CHECK_TYPE_OBJ(sidebar, SidebarIndexPage); if (sidebar->searchTimeout) { g_source_remove(sidebar->searchTimeout); sidebar->searchTimeout = 0; } sidebar->searchTimeout = g_timeout_add_seconds_full(G_PRIORITY_DEFAULT_IDLE, 2, (GSourceFunc) searchTimeoutFunc, sidebar, NULL); // Source: Pidgin gchar* enteredstring; gchar* tmp; gchar* text; gchar* normalized; gboolean result; size_t i; size_t len; PangoLogAttr* log_attrs; gchar* word; gtk_tree_model_get(model, iter, DOCUMENT_LINKS_COLUMN_NAME, &text, -1); if (text == NULL) { return TRUE; } tmp = g_utf8_normalize(key, -1, G_NORMALIZE_DEFAULT); enteredstring = g_utf8_casefold(tmp, -1); g_free(tmp); tmp = g_utf8_normalize(text, -1, G_NORMALIZE_DEFAULT); normalized = g_utf8_casefold(tmp, -1); g_free(tmp); if (g_str_has_prefix(normalized, enteredstring)) { g_free(enteredstring); g_free(normalized); return FALSE; } /* Use Pango to separate by words. */ len = g_utf8_strlen(normalized, -1); log_attrs = g_new(PangoLogAttr, len + 1); pango_get_log_attrs(normalized, strlen(normalized), -1, NULL, log_attrs, len + 1); word = normalized; result = TRUE; for (i = 0; i < (len - 1); i++) { if (log_attrs[i].is_word_start && g_str_has_prefix(word, enteredstring)) { result = FALSE; break; } word = g_utf8_next_char(word); } g_free(log_attrs); g_free(enteredstring); g_free(normalized); return result; }
static void dump_text (const char *text) { int len; PangoLogAttr *attrs; int i; gunichar *ucs4; if (!g_utf8_validate (text, -1, NULL)) fail ("Invalid UTF-8 in file"); len = g_utf8_strlen (text, -1); attrs = g_new0 (PangoLogAttr, len + 1); pango_get_log_attrs (text, -1, 0, pango_language_from_string ("C"), attrs, len + 1); ucs4 = g_utf8_to_ucs4 (text, -1, NULL, NULL, NULL); i = 0; while (i <= len) { char buf[7] = { '\0', }; char *loc; g_unichar_to_utf8 (ucs4[i], buf); if (*buf == '\n') loc = g_strdup ("\\n"); else if (*buf == '\r') loc = g_strdup ("\\r"); else loc = g_locale_from_utf8 (buf, -1, NULL, NULL, NULL); g_print (CHFORMAT " (%s):\t line_break = %d mandatory_break = %d char_break = %d\n" " \t\t white = %d cursor_position = %d\n" " \t\t word_start = %d word_end = %d\n" " \t\t sentence_boundary = %d sentence_start = %d sentence_end = %d\n", ucs4[i], loc ? loc : "?", attrs[i].is_line_break, attrs[i].is_mandatory_break, attrs[i].is_char_break, attrs[i].is_white, attrs[i].is_cursor_position, attrs[i].is_word_start, attrs[i].is_word_end, attrs[i].is_sentence_boundary, attrs[i].is_sentence_start, attrs[i].is_sentence_end); g_free (loc); ++i; } g_free (ucs4); g_free (attrs); }