/** Kolorowanie słów. Słowa nie znajdujące się w słowniku - potencjalnie błędne - zostają oznaczone na czerwono. */ static void show_errors() { GtkTextIter start, end, text_end; int i, range; char *word; gunichar *wword; gtk_text_buffer_create_tag(editor_buf, "red_fg", "foreground", "red", "weight", PANGO_WEIGHT_BOLD, NULL); gtk_text_buffer_get_end_iter(editor_buf, &text_end); gtk_text_buffer_get_start_iter(editor_buf, &end); range = gtk_text_buffer_get_char_count(editor_buf); while (!gtk_text_iter_is_end(&end)) { gtk_text_iter_forward_word_end(&end); start = end; gtk_text_iter_backward_word_start(&start); word = gtk_text_iter_get_text(&start, &end); wword = g_utf8_to_ucs4_fast(word, -1, NULL); if (make_lowercase(wword)) { if (!dictionary_find(dict, wword)) gtk_text_buffer_apply_tag_by_name(editor_buf, "red_fg", &start, &end); } g_free(word); g_free(wword); } }
static gboolean check_font_contain_text (FT_Face face, const gchar *text) { gunichar *string; glong len, idx, map; FT_CharMap charmap; gboolean retval; string = g_utf8_to_ucs4_fast (text, -1, &len); for (map = 0; map < face->num_charmaps; map++) { charmap = face->charmaps[map]; FT_Set_Charmap (face, charmap); retval = TRUE; for (idx = 0; idx < len; idx++) { gunichar c = string[idx]; if (!FT_Get_Char_Index (face, c)) { retval = FALSE; break; } } if (retval) break; } g_free (string); return retval; }
static gboolean check_font_contain_text (FT_Face face, const gchar *text) { gunichar *string; glong len, idx; gboolean retval = TRUE; string = g_utf8_to_ucs4_fast (text, -1, &len); FT_Select_Charmap (face, FT_ENCODING_UNICODE); for (idx = 0; idx < len; idx++) { gunichar c = string[idx]; if (!FT_Get_Char_Index (face, c)) { retval = FALSE; break; } } g_free (string); return retval; }
/* Internal function: convert a null-terminated UTF-8 string to a null-terminated UCS4 string. If items_written is not NULL it will be filled with the number of code points returned, not counting the terminator. The returned string must be freed afterwards. Returns NULL on error. */ gunichar * convert_utf8_to_ucs4(const gchar *s, glong *items_written) { gunichar *retval = g_utf8_to_ucs4_fast(s, -1, items_written); if(retval == NULL) WARNING_S("Error during utf8->unicode conversion of string", s); return retval; }
GList* XojPopplerPage::findText(string& text) { XOJ_CHECK_TYPE(XojPopplerPage); XojPopplerRectangle* match; GList* matches; double xMin, yMin, xMax, yMax; gunichar* ucs4; glong ucs4_len; initTextPage(); ucs4 = g_utf8_to_ucs4_fast(text.c_str(), -1, &ucs4_len); matches = NULL; xMin = 0; yMin = 0; // // Find a string. If <startAtTop> is true, starts looking at the // // top of the page; else if <startAtLast> is true, starts looking // // immediately after the last find result; else starts looking at // // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the // // bottom of the page; else if <stopAtLast> is true, stops looking // // just before the last find result; else stops looking at // // <xMax>,<yMax>. // GBool findText(Unicode *s, int len, // GBool startAtTop, GBool stopAtBottom, // GBool startAtLast, GBool stopAtLast, // GBool caseSensitive, GBool backward, // double *xMin, double *yMin, // double *xMax, double *yMax); bool atTop = true; while (this->text->findText(ucs4, ucs4_len, atTop, true, !atTop, false, // startAtTop, stopAtBottom, startAtLast, stopAtLast false, false, false, // caseSensitive, backwards, GBool wholeWord &xMin, &yMin, &xMax, &yMax)) { match = new XojPopplerRectangle(); match->x1 = xMin; match->y1 = yMax; match->x2 = xMax; match->y2 = yMin; matches = g_list_prepend(matches, match); atTop = false; } g_free(ucs4); return g_list_reverse(matches); }
static int grind_utf8_to_ucs4_fast_sized (const char *str, gsize len) { int i; for (i = 0; i < NUM_ITERATIONS; i++) { gunichar *ustr; ustr = g_utf8_to_ucs4_fast (str, len, NULL); g_free (ustr); } return 0; }
ustring character_to_hexadecimal_entity(const ustring & character) { gunichar unichar; gunichar *uc; uc = g_utf8_to_ucs4_fast(character.c_str(), -1, NULL); unichar = *uc; g_free(uc); gchar *decimal = g_strdup_printf("U+%04X", unichar); ustring udec(decimal); g_free(decimal); return udec; }
/** * pango_log2vis_get_embedding_levels: * @text: the text to itemize. * @length: the number of bytes (not characters) to process, or -1 * if @text is nul-terminated and the length should be calculated. * @pbase_dir: input base direction, and output resolved direction. * * This will return the bidirectional embedding levels of the input paragraph * as defined by the Unicode Bidirectional Algorithm available at: * * http://www.unicode.org/reports/tr9/ * * If the input base direction is a weak direction, the direction of the * characters in the text will determine the final resolved direction. * * Return value: a newly allocated array of embedding levels, one item per * character (not byte), that should be freed using g_free. * * Since: 1.4 */ guint8 * pango_log2vis_get_embedding_levels (const gchar *text, int length, PangoDirection *pbase_dir) { FriBidiCharType fribidi_base_dir; guint8 *embedding_levels_list; switch (*pbase_dir) { case PANGO_DIRECTION_LTR: case PANGO_DIRECTION_TTB_RTL: fribidi_base_dir = FRIBIDI_TYPE_L; break; case PANGO_DIRECTION_RTL: case PANGO_DIRECTION_TTB_LTR: fribidi_base_dir = FRIBIDI_TYPE_R; break; case PANGO_DIRECTION_WEAK_RTL: fribidi_base_dir = FRIBIDI_TYPE_WR; break; case PANGO_DIRECTION_WEAK_LTR: case PANGO_DIRECTION_NEUTRAL: default: fribidi_base_dir = FRIBIDI_TYPE_WL; break; } #ifdef FRIBIDI_HAVE_UTF8 { if (length < 0) length = strlen (text); embedding_levels_list = (guint8 *) fribidi_log2vis_get_embedding_levels_new_utf8 (text, length, &fribidi_base_dir); } #else { gunichar *text_ucs4; int n_chars; text_ucs4 = g_utf8_to_ucs4_fast (text, length, &n_chars); embedding_levels_list = g_new (guint8, n_chars); fribidi_log2vis_get_embedding_levels ((FriBidiChar*)text_ucs4, n_chars, &fribidi_base_dir, (FriBidiLevel*)embedding_levels_list); g_free (text_ucs4); } #endif *pbase_dir = (fribidi_base_dir == FRIBIDI_TYPE_L) ? PANGO_DIRECTION_LTR : PANGO_DIRECTION_RTL; return embedding_levels_list; }
static gchar * utf8_case_conv (const gchar *str, gssize len, gboolean upper) { gunichar *ustr; glong i, ulen; gchar *utf8; ustr = g_utf8_to_ucs4_fast (str, (glong) len, &ulen); for (i = 0; i < ulen; i++) ustr[i] = upper ? g_unichar_toupper (ustr[i]) : g_unichar_tolower (ustr[i]); utf8 = g_ucs4_to_utf8 (ustr, ulen, NULL, NULL, NULL); g_free (ustr); return utf8; }
void script_encode_usfm_file(const ustring & filename) // Encodes a USFM file. The purpose is that the USFM marked are not changed by the script. // The assumption is that numbers are not affected. { // Read the file. Bail out if there's no text. ustring input; { gchar *contents; g_file_get_contents(filename.c_str(), &contents, NULL, NULL); if (!contents) return; input = contents; g_free(contents); } if (input.empty()) return; // Go through the input, changing usfm codes to their numerical equivalent, // and copying data to the output. // E.g. "\id" would become "\_105_100". ustring output; bool within_usfm = false; for (unsigned int i = 0; i < input.length(); i++) { ustring character = input.substr(i, 1); if (within_usfm) if (character == " ") within_usfm = false; if (within_usfm) { gunichar unichar; gunichar *uc; uc = g_utf8_to_ucs4_fast(character.c_str(), -1, NULL); unichar = *uc; g_free(uc); character = "_" + convert_to_string(unichar); } output.append(character); if (character == "\\") within_usfm = true; } // Write the data back to the file. g_file_set_contents(filename.c_str(), output.c_str(), -1, NULL); }
// Koloruje słowa nie ze słownika na czerwono static void ColorMistakes (GtkMenuItem *item, gpointer data) { GtkTextIter start, end, buffEnd; gtk_text_buffer_get_start_iter(editor_buf, &end); //Teraz sztuczką przesuwamy końcowy iterator na koniec ostatniego słowa gtk_text_buffer_get_end_iter(editor_buf, &buffEnd); gtk_text_iter_backward_word_start(&buffEnd); gtk_text_iter_forward_word_end(&buffEnd); gtk_text_buffer_create_tag(editor_buf, "red_fg", "foreground", "red", "weight", PANGO_WEIGHT_BOLD, NULL); // Aktualizacja słownika if(update_actual_dict() < 0) return; start = end; while (!gtk_text_iter_equal(&end, &buffEnd)) { // Inkrementacja zmiennych gtk_text_iter_forward_word_end(&end); start = end; gtk_text_iter_backward_word_start(&start); // Usuwamy etykietkę ze słowa jeśli jest gtk_text_buffer_remove_tag_by_name(editor_buf, "red_fg", &start, &end); // Separujemy słowo char* word = gtk_text_iter_get_text(&start, &end); gunichar* wword = g_utf8_to_ucs4_fast(word, -1, NULL); // Jeśli znaleziono w słowniku to kolorujemy if(!dictionary_find(dict, (const wchar_t*)wword)) gtk_text_buffer_apply_tag_by_name(editor_buf, "red_fg", &start, &end); g_free(word); } }
int mbstrlen(char* src) { int i = 0, ch = 0, len = 0; gunichar* cpoints; setlocale(LC_CTYPE, LOCALE); while (src[i] != '\0') { /* get string length */ if ((ch = mblen(&src[i], MB_CUR_MAX)) < 0) return 0; if (ch > 1) { cpoints = g_utf8_to_ucs4_fast(&src[i], sizeof(src[i]), NULL); /* get unicode code point */ /* * multi byte * true : hankaku kana * false: other */ if (cpoints[0] >= 0xff65 && cpoints[0] <= 0xff9f) { len++; } else { len += 2; } g_free(cpoints); } else { len++; /* ascii */ } i += ch; /* seek offset */ } return len; }
int get_codepoint(char* str) { int i = 0, ch = 0; gunichar* cpoints; setlocale(LC_CTYPE, "ja_JP.UTF-8"); while (i < strlen(str)) { ch = mblen(&str[i], MB_CUR_MAX); cpoints = g_utf8_to_ucs4_fast(&str[i], sizeof(str[i]), NULL); fprintf(stdout, "\\u%04x", *cpoints); g_free(cpoints); if (ch > 1) { i += ch; } else { i++; } } return 0; }
static void khmer_engine_shape (PangoEngineShape *engine, PangoFont *font, const char *text, int length, const PangoAnalysis *analysis, PangoGlyphString *glyphs) { PangoFcFont *fc_font; FT_Face face; PangoOTRuleset *ruleset; PangoOTBuffer *buffer; glong n_chars; gunichar *wcs; const char *p; int i; glong syllable; KhmerCharClass charClass; glong cursor = 0; g_return_if_fail (font != NULL); g_return_if_fail (text != NULL); g_return_if_fail (length >= 0); g_return_if_fail (analysis != NULL); fc_font = PANGO_FC_FONT (font); face = pango_fc_font_lock_face (fc_font); if (!face) return; buffer = pango_ot_buffer_new (fc_font); wcs = g_utf8_to_ucs4_fast (text, length, &n_chars); p = text; /* This loop only exits when we reach the end of a run, which may contain * several syllables. */ while (cursor < n_chars) { /* write a pre vowel or the pre part of a split vowel first * and look out for coeng + ro. RO is the only vowel of type 2, and * therefore the only one that requires saving space before the base. */ glong coengRo = -1; /* There is no Coeng Ro, if found this value will change */ syllable = find_syllable (wcs, cursor, n_chars); for (i = cursor; i < syllable; i += 1) { charClass = get_char_class (wcs[i]); /* if a split vowel, write the pre part. In Khmer the pre part * is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */ if (charClass & CF_SPLIT_VOWEL) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_VOWEL_E), pref_p, p - text); break; /* there can be only one vowel */ } /* if a vowel with pos before write it out */ if (charClass & CF_POS_BEFORE) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text); break; /* there can be only one vowel */ } /* look for coeng + ro and remember position * works because coeng + ro is always in front of a vowel (if there is a vowel) * and because CC_CONSONANT2 is enough to identify it, as it is the only consonant * with this flag */ if ((charClass & CF_COENG) && (i + 1 < syllable) && ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT2)) { coengRo = i; } } /* write coeng + ro if found */ if (coengRo > -1) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_COENG), pref_p, p - text); pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_RO), pref_p, p - text); } /* shall we add a dotted circle? * If in the position in which the base should be (first char in the string) there is * a character that has the Dotted circle flag (a character that cannot be a base) * then write a dotted circle */ if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text); } /* copy what is left to the output, skipping before vowels and * coeng Ro if they are present */ for (i = cursor; i < syllable; i += 1) { charClass = get_char_class (wcs[i]); /* skip a before vowel, it was already processed */ if (charClass & CF_POS_BEFORE) { p = g_utf8_next_char (p); continue; } /* skip coeng + ro, it was already processed */ if (i == coengRo) { p = g_utf8_next_char (p); i += 1; p = g_utf8_next_char (p); continue; } switch (charClass & CF_POS_MASK) { case CF_POS_ABOVE : pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text); break; case CF_POS_AFTER : pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text); break; case CF_POS_BELOW : pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); break; default: /* assign the correct flags to a coeng consonant * Consonants of type 3 are taged as Post forms and those type 1 as below forms */ if ((charClass & CF_COENG) && i + 1 < syllable) { if ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT3) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text); p = g_utf8_next_char (p); i += 1; pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text); break; } else { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); p = g_utf8_next_char (p); i += 1; pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); break; } } /* if a shifter is followed by an above vowel change the shifter to below form, * an above vowel can have two possible positions i + 1 or i + 3 * (position i+1 corresponds to unicode 3, position i+3 to Unicode 4) * and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two * different positions, right after the shifter or after a vowel (Unicode 4) */ if ((charClass & CF_SHIFTER) && (i + 1 < syllable)) { if (get_char_class (wcs[i + 1]) & CF_ABOVE_VOWEL) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); break; } if (i + 2 < syllable && (wcs[i + 1] == C_VOWEL_AA) && (wcs[i + 2] == C_SIGN_NIKAHIT) ) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); break; } if (i + 3 < syllable && (get_char_class (wcs[i + 3]) & CF_ABOVE_VOWEL) ) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); break; } if (i + 4 < syllable && (wcs[i + 3] == C_VOWEL_AA) && (wcs[i + 4] == C_SIGN_NIKAHIT) ) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); break; } } /* default - any other characters */ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text); break; } /* switch */ p = g_utf8_next_char (p); } /* for */ cursor = syllable; /* move the pointer to the start of next syllable */ } /* while */ /* do gsub processing */ ruleset = get_ruleset (face); if (ruleset != NULL) { pango_ot_ruleset_substitute (ruleset, buffer); pango_ot_ruleset_position (ruleset, buffer); } pango_ot_buffer_output (buffer, glyphs); g_free (wcs); pango_ot_buffer_destroy (buffer); pango_fc_font_unlock_face (fc_font); }
static void tibetan_engine_shape (PangoEngineShape *engine, PangoFont *font, const char *text, int length, const PangoAnalysis *analysis, PangoGlyphString *glyphs) { PangoFcFont *fc_font; FT_Face face; PangoOTRuleset *ruleset; PangoOTBuffer *buffer; glong n_chars; gunichar *wcs; const char *p; int i; glong syllable; TibetanCharClass charClass; glong cursor = 0; g_return_if_fail (font != NULL); g_return_if_fail (text != NULL); g_return_if_fail (length >= 0); g_return_if_fail (analysis != NULL); fc_font = PANGO_FC_FONT (font); face = pango_fc_font_lock_face (fc_font); if (!face) return; buffer = pango_ot_buffer_new (fc_font); wcs = g_utf8_to_ucs4_fast (text, length, &n_chars); p = text; /* This loop only exits when we reach the end of a run, which may contain * several syllables. */ while (cursor < n_chars) { syllable = find_syllable (wcs, cursor, n_chars); /* shall we add a dotted circle? * If in the position in which the base should be (first char in the string) there is * a character that has the Dotted circle flag (a character that cannot be a base) * then write a dotted circle */ if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text); } /* If it encounters a digit followed by number pre combining mark, then reorder the two characters * coeng Ro if they are present */ for (i = cursor; i < syllable; i += 1) { charClass = get_char_class (wcs[i]); if ((charClass & CF_DIGIT ) && ( get_char_class (wcs[i+1]) & CF_PREDIGIT)) { pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_PRE_NUMBER_MARK), pref_p, p - text); p = g_utf8_next_char (p); pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text); i += 1; } else { switch (charClass & CF_POS_MASK) { case CF_POS_ABOVE : pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text); break; case CF_POS_AFTER : pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text); break; case CF_POS_BELOW : pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text); break; default: /* default - any other characters */ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text); break; } /* switch */ } p = g_utf8_next_char (p); } /* for */ cursor = syllable; /* move the pointer to the start of next syllable */ } /* while */ /* do gsub processing */ ruleset = get_ruleset (face); if (ruleset != NULL) { pango_ot_ruleset_substitute (ruleset, buffer); pango_ot_ruleset_position (ruleset, buffer); } pango_ot_buffer_output (buffer, glyphs); g_free (wcs); pango_ot_buffer_destroy (buffer); pango_fc_font_unlock_face (fc_font); }
gchar* rb_search_fold (const char *original) { GString *string; gchar *normalized; gunichar *unicode, *cur; g_return_val_if_fail (original != NULL, NULL); /* old behaviour is equivalent to: return g_utf8_casefold (original, -1); */ string = g_string_new (NULL); normalized = g_utf8_normalize(original, -1, G_NORMALIZE_DEFAULT); unicode = g_utf8_to_ucs4_fast (normalized, -1, NULL); for (cur = unicode; *cur != 0; cur++) { switch (g_unichar_type (*cur)) { case G_UNICODE_COMBINING_MARK: case G_UNICODE_ENCLOSING_MARK: case G_UNICODE_NON_SPACING_MARK: case G_UNICODE_CONNECT_PUNCTUATION: case G_UNICODE_DASH_PUNCTUATION: case G_UNICODE_CLOSE_PUNCTUATION: case G_UNICODE_FINAL_PUNCTUATION: case G_UNICODE_INITIAL_PUNCTUATION: case G_UNICODE_OTHER_PUNCTUATION: case G_UNICODE_OPEN_PUNCTUATION: /* remove these */ break; case G_UNICODE_LOWERCASE_LETTER: case G_UNICODE_MODIFIER_LETTER: case G_UNICODE_OTHER_LETTER: case G_UNICODE_TITLECASE_LETTER: case G_UNICODE_UPPERCASE_LETTER: /* convert to lower case */ *cur = g_unichar_tolower (*cur); /* ... and fall through */\ case G_UNICODE_DECIMAL_NUMBER: case G_UNICODE_LETTER_NUMBER: case G_UNICODE_OTHER_NUMBER: /* should be keep symbols? */ case G_UNICODE_CURRENCY_SYMBOL: case G_UNICODE_MODIFIER_SYMBOL: case G_UNICODE_MATH_SYMBOL: case G_UNICODE_OTHER_SYMBOL: g_string_append_unichar (string, *cur); break; case G_UNICODE_UNASSIGNED: rb_debug ("unassigned unicode character type found"); /* fall through */ default: /* leave these in */ g_string_append_unichar (string, *cur); } } g_free (unicode); g_free (normalized); return g_string_free (string, FALSE); }
/** * stringprep_utf8_to_ucs4: * @str: a UTF-8 encoded string * @len: the maximum length of @str to use. If @len < 0, then * the string is nul-terminated. * @items_written: location to store the number of characters in the * result, or %NULL. * * Convert a string from UTF-8 to a 32-bit fixed width * representation as UCS-4, assuming valid UTF-8 input. * This function does no error checking on the input. * * Return value: a pointer to a newly allocated UCS-4 string. * This value must be freed with free(). **/ my_uint32_t * stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written) { return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written); }
static void syriac_engine_shape (PangoEngineShape *engine, PangoFont *font, const char *text, gint length, const PangoAnalysis *analysis, PangoGlyphString *glyphs) { PangoFcFont *fc_font; FT_Face face; PangoOTRulesetDescription desc; const PangoOTRuleset *ruleset; PangoOTBuffer *buffer; gulong *properties = NULL; glong n_chars; gunichar *wcs; const char *p; int cluster = 0; int i; g_return_if_fail (font != NULL); g_return_if_fail (text != NULL); g_return_if_fail (length >= 0); g_return_if_fail (analysis != NULL); fc_font = PANGO_FC_FONT (font); face = pango_fc_font_lock_face (fc_font); if (!face) return; buffer = pango_ot_buffer_new (fc_font); pango_ot_buffer_set_rtl (buffer, analysis->level % 2 != 0); pango_ot_buffer_set_zero_width_marks (buffer, TRUE); wcs = g_utf8_to_ucs4_fast (text, length, &n_chars); properties = g_new0 (gulong, n_chars); syriac_assign_properties (wcs, properties, n_chars); g_free (wcs); p = text; for (i=0; i < n_chars; i++) { gunichar wc; PangoGlyph glyph; wc = g_utf8_get_char (p); if (g_unichar_type (wc) != G_UNICODE_NON_SPACING_MARK) cluster = p - text; if (pango_is_zero_width (wc)) glyph = PANGO_GLYPH_EMPTY; else { gunichar c = wc; if (analysis->level % 2) g_unichar_get_mirror_char (c, &c); glyph = pango_fc_font_get_glyph (fc_font, c); } if (!glyph) glyph = PANGO_GET_UNKNOWN_GLYPH (wc); pango_ot_buffer_add_glyph (buffer, glyph, properties[i], cluster); p = g_utf8_next_char (p); } g_free (properties); desc.script = analysis->script; desc.language = analysis->language; desc.n_static_gsub_features = G_N_ELEMENTS (gsub_features); desc.static_gsub_features = gsub_features; desc.n_static_gpos_features = G_N_ELEMENTS (gpos_features); desc.static_gpos_features = gpos_features; /* TODO populate other_features from analysis->extra_attrs */ desc.n_other_features = 0; desc.other_features = NULL; ruleset = pango_ot_ruleset_get_for_description (pango_ot_info_get (face), &desc); pango_ot_ruleset_substitute (ruleset, buffer); pango_ot_ruleset_position (ruleset, buffer); pango_ot_buffer_output (buffer, glyphs); pango_ot_buffer_destroy (buffer); pango_fc_font_unlock_face (fc_font); }
static void process (gint line, gchar *utf8, Status status, gunichar *ucs4, gint ucs4_len) { const gchar *end; gboolean is_valid = g_utf8_validate (utf8, -1, &end); GError *error = NULL; glong items_read, items_written; switch (status) { case VALID: if (!is_valid) { fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); return; } break; case NOTUNICODE: case INCOMPLETE: case OVERLONG: case MALFORMED: if (is_valid) { fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); return; } break; } if (status == INCOMPLETE) { gunichar *ucs4_result; ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) { fail ("line %d: incomplete input not properly detected\n", line); return; } g_clear_error (&error); ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); if (!ucs4_result || items_read == strlen (utf8)) { fail ("line %d: incomplete input not properly detected\n", line); return; } g_free (ucs4_result); } if (status == VALID || status == NOTUNICODE) { gunichar *ucs4_result; gchar *utf8_result; ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); if (!ucs4_result) { fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); return; } if (!ucs4_equal (ucs4_result, ucs4) || items_read != strlen (utf8) || items_written != ucs4_len) { fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); return; } g_free (ucs4_result); ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); if (!ucs4_equal (ucs4_result, ucs4) || items_written != ucs4_len) { fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); return; } utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); if (!utf8_result) { fail ("line %d: conversion back to utf8 failed: %s", line, error->message); return; } if (strcmp (utf8_result, utf8) != 0 || items_read != ucs4_len || items_written != strlen (utf8)) { fail ("line %d: conversion back to utf8 did not match original\n", line); return; } g_free (utf8_result); g_free (ucs4_result); } if (status == VALID) { gunichar2 *utf16_expected_tmp; gunichar2 *utf16_expected; gunichar2 *utf16_from_utf8; gunichar2 *utf16_from_ucs4; gunichar *ucs4_result; gsize bytes_written; gint n_chars; gchar *utf8_result; #if G_BYTE_ORDER == G_LITTLE_ENDIAN #define TARGET "UTF-16LE" #else #define TARGET "UTF-16" #endif if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", NULL, &bytes_written, NULL))) { fail ("line %d: could not convert to UTF-16 via g_convert\n", line); return; } /* zero-terminate and remove BOM */ n_chars = bytes_written / 2; if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ { n_chars--; utf16_expected = g_new (gunichar2, n_chars + 1); memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); } else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ { fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); return; } else { utf16_expected = g_new (gunichar2, n_chars + 1); memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); } utf16_expected[n_chars] = '\0'; if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); return; } if (items_read != strlen (utf8) || utf16_count (utf16_from_utf8) != items_written) { fail ("line %d: length error in conversion to ucs16\n", line); return; } if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); return; } if (items_read != ucs4_len || utf16_count (utf16_from_ucs4) != items_written) { fail ("line %d: length error in conversion to ucs16\n", line); return; } if (!utf16_equal (utf16_from_utf8, utf16_expected) || !utf16_equal (utf16_from_ucs4, utf16_expected)) { fail ("line %d: results of conversion to ucs16 do not match\n", line); return; } if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); return; } if (items_read != utf16_count (utf16_from_utf8) || items_written != strlen (utf8)) { fail ("line %d: length error in conversion from ucs16 to utf8\n", line); return; } if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion back to utf8/ucs4 failed\n", line); return; } if (items_read != utf16_count (utf16_from_utf8) || items_written != ucs4_len) { fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); return; } if (strcmp (utf8, utf8_result) != 0 || !ucs4_equal (ucs4, ucs4_result)) { fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); return; } g_free (utf16_expected_tmp); g_free (utf16_expected); g_free (utf16_from_utf8); g_free (utf16_from_ucs4); g_free (utf8_result); g_free (ucs4_result); } }
/** Sprawdzenie słowa, na którym aktualnie znajduje się kursor. Ewentualne dodanie do słownika. @param[in] item element menu. @param[in] data wskaźnik na wartość. */ static void WhatCheck (GtkMenuItem *item, gpointer data) { GtkWidget *dialog; GtkTextIter start, end; char *word; gunichar *wword; //load_dictionary_from_menu(&dict); // Znajdujemy pozycję kursora gtk_text_buffer_get_iter_at_mark(editor_buf, &start, gtk_text_buffer_get_insert(editor_buf)); // Jeśli nie wewnątrz słowa, kończymy if (!gtk_text_iter_inside_word(&start)) { dialog = gtk_message_dialog_new(NULL, 0, GTK_MESSAGE_ERROR, GTK_BUTTONS_OK, "Kursor musi być w środku słowa"); gtk_dialog_run(GTK_DIALOG(dialog)); gtk_widget_destroy(dialog); return; } // Znajdujemy początek i koniec słowa, a potem samo słowo end = start; gtk_text_iter_backward_word_start(&start); gtk_text_iter_forward_word_end(&end); word = gtk_text_iter_get_text(&start, &end); // Zamieniamy na wide char (no prawie) wword = g_utf8_to_ucs4_fast(word, -1, NULL); if (!make_lowercase(wword)) { dialog = gtk_message_dialog_new(NULL, 0, GTK_MESSAGE_INFO, GTK_BUTTONS_OK, "Podane słowo nie jest słowem słownikowym."); gtk_dialog_run(GTK_DIALOG(dialog)); gtk_widget_destroy(dialog); } else { // Sprawdzamy if (dictionary_find(dict, (wchar_t *)wword)) { dialog = gtk_message_dialog_new(NULL, 0, GTK_MESSAGE_INFO, GTK_BUTTONS_OK, "Wszystko w porządku,\nśpij spokojnie"); gtk_dialog_run(GTK_DIALOG(dialog)); gtk_widget_destroy(dialog); } else { // Czas korekty GtkWidget *vbox, *label, *combo; struct word_list hints; int i; wchar_t **words; dictionary_hints(dict, (wchar_t *)wword, &hints); words = (wchar_t **) word_list_get(&hints); dialog = gtk_dialog_new_with_buttons("Korekta", NULL, 0, GTK_STOCK_OK, GTK_RESPONSE_ACCEPT, GTK_STOCK_ADD, GTK_RESPONSE_APPLY, GTK_STOCK_CANCEL, GTK_RESPONSE_REJECT, NULL); // W treści dialogu dwa elementy vbox = gtk_dialog_get_content_area(GTK_DIALOG(dialog)); // Tekst label = gtk_label_new("Słowo nie znajduje się w słowniku. Wybierz \njedną z propozycji lub dodaj słowa do słownika."); gtk_widget_show(label); gtk_box_pack_start(GTK_BOX(vbox), label, FALSE, FALSE, 1); // Spuszczane menu combo = gtk_combo_box_text_new(); for (i = 0; i < word_list_size(&hints); i++) { // Combo box lubi mieć Gtk char *uword = g_ucs4_to_utf8((gunichar *)words[i], -1, NULL, NULL, NULL); // Dodajemy kolejny element gtk_combo_box_text_append_text(GTK_COMBO_BOX_TEXT(combo), uword); g_free(uword); } gtk_combo_box_set_active(GTK_COMBO_BOX(combo), 0); gtk_box_pack_start(GTK_BOX(vbox), combo, FALSE, FALSE, 1); gtk_widget_show(combo); gint click = gtk_dialog_run(GTK_DIALOG(dialog)); if (click == GTK_RESPONSE_ACCEPT) { char *korekta = gtk_combo_box_text_get_active_text(GTK_COMBO_BOX_TEXT(combo)); // Usuwamy stare gtk_text_buffer_delete(editor_buf, &start, &end); // Wstawiamy nowe gtk_text_buffer_insert(editor_buf, &start, korekta, -1); g_free(korekta); } // Proponujemy dodanie słowa do słownika else if (click == GTK_RESPONSE_APPLY) dictionary_insert(dict, wword); gtk_widget_destroy(dialog); } } g_free(word); g_free(wword); }
/* Hi, this method sucks and is very much untested. However, it did * actually render one utf-8 persian post i looked at, so its still * better than just using g_strescape. * * If you have any experience with non us-english languages, you should * look this over and make it more robust to corner cases. * * Cheers, -- Christian */ static gchar* json_strescape (const gchar *source) { gchar *dest, *q; gunichar *ucs4; gint i, len; if (!g_utf8_validate (source, -1, NULL)) return g_strescape (source, NULL); len = g_utf8_strlen (source, -1); dest = q = g_malloc (len * 6 + 1); ucs4 = g_utf8_to_ucs4_fast (source, -1, NULL); for (i = 0; i < len; i++) { switch (ucs4 [i]) { case '\\': *q++ = '\\'; *q++ = '\\'; break; case '"': *q++ = '\\'; *q++ = '"'; break; case '\b': *q++ = '\\'; *q++ = 'b'; break; case '\f': *q++ = '\\'; *q++ = 'f'; break; case '\n': *q++ = '\\'; *q++ = 'n'; break; case '\r': *q++ = '\\'; *q++ = 'r'; break; case '\t': *q++ = '\\'; *q++ = 't'; break; default: if ((ucs4 [i] >= (gunichar)0x20) || (ucs4 [i] <= (gunichar)0x1F)) { g_sprintf (q, "\\u%04x", ucs4 [i]); q += 6; } else *q++ = ((gchar)ucs4 [i]); } } *q++ = 0; g_free (ucs4); return dest; }
static void WhatCheck (GtkMenuItem *item, gpointer data) { GtkWidget *dialog; GtkTextIter start, end; char *word; gunichar *wword; // Znajdujemy pozycję kursora gtk_text_buffer_get_iter_at_mark(editor_buf, &start, gtk_text_buffer_get_insert(editor_buf)); // Jeśli nie wewnątrz słowa, kończymy if (!gtk_text_iter_inside_word(&start)) { dialog = gtk_message_dialog_new(NULL, 0, GTK_MESSAGE_ERROR, GTK_BUTTONS_OK, "Kursor musi być w środku słowa"); gtk_dialog_run(GTK_DIALOG(dialog)); gtk_widget_destroy(dialog); return; } // Znajdujemy początek i koniec słowa, a potem samo słowo end = start; gtk_text_iter_backward_word_start(&start); gtk_text_iter_forward_word_end(&end); word = gtk_text_iter_get_text(&start, &end); // Zamieniamy na wide char (no prawie) wword = g_utf8_to_ucs4_fast(word, -1, NULL); if(update_actual_dict() < 0) return; // Sprawdzamy if (dictionary_find(dict, (wchar_t *)wword)) { dialog = gtk_message_dialog_new(NULL, 0, GTK_MESSAGE_INFO, GTK_BUTTONS_OK, "Wszystko w porządku,\nśpij spokojnie"); gtk_dialog_run(GTK_DIALOG(dialog)); gtk_widget_destroy(dialog); } else { // Czas korekty GtkWidget *vbox, *label, *combo; struct word_list hints; int i; wchar_t **words; dictionary_hints(dict, (wchar_t *)wword, &hints); words = word_list_get(&hints); dialog = gtk_dialog_new_with_buttons("Korekta", NULL, 0, GTK_STOCK_OK, GTK_RESPONSE_ACCEPT, GTK_STOCK_CANCEL, GTK_RESPONSE_REJECT, NULL); // W treści dialogu dwa elementy vbox = gtk_dialog_get_content_area(GTK_DIALOG(dialog)); // Tekst label = gtk_label_new("Coś nie tak, mam kilka propozycji"); gtk_widget_show(label); gtk_box_pack_start(GTK_BOX(vbox), label, FALSE, FALSE, 1); // Spuszczane menu combo = gtk_combo_box_text_new(); for (i = 0; i < word_list_size(&hints); i++) { // Combo box lubi mieć Gtk char *uword = g_ucs4_to_utf8((gunichar *)words[i], -1, NULL, NULL, NULL); // Dodajemy kolejny element gtk_combo_box_text_append_text(GTK_COMBO_BOX_TEXT(combo), uword); g_free(uword); } //gtk_combo_box_text_append_text(GTK_COMBO_BOX_TEXT(combo),"<inne...>"); gtk_combo_box_set_active(GTK_COMBO_BOX(combo), 0); gtk_box_pack_start(GTK_BOX(vbox), combo, FALSE, FALSE, 1); gtk_widget_show(combo); char *korekta, *question; GtkWidget *ask_dialog, *ask_vbox, *ask_label; switch (gtk_dialog_run(GTK_DIALOG(dialog))) { case GTK_RESPONSE_ACCEPT: korekta = gtk_combo_box_text_get_active_text(GTK_COMBO_BOX_TEXT(combo)); // Usuwamy stare gtk_text_buffer_delete(editor_buf, &start, &end); // Wstawiamy nowe gtk_text_buffer_insert(editor_buf, &start, korekta, -1); g_free(korekta); break; case GTK_RESPONSE_REJECT: question = "Czy chcesz dodać to słowo do słownika?"; ask_dialog = gtk_dialog_new_with_buttons(question, NULL, 0, GTK_STOCK_OK, GTK_RESPONSE_ACCEPT, GTK_STOCK_CANCEL, GTK_RESPONSE_REJECT, NULL); ask_vbox = gtk_dialog_get_content_area(GTK_DIALOG(dialog)); // Tekst ask_label = gtk_label_new("Coś nie tak, mam kilka propozycji"); gtk_widget_show(ask_label); gtk_box_pack_start(GTK_BOX(ask_vbox), ask_label, FALSE, FALSE, 1); // Jeśli chiciał dodać nowe słowo do słownika to dodamy i zapiszemy if (gtk_dialog_run(GTK_DIALOG(ask_dialog)) == GTK_RESPONSE_ACCEPT) { dictionary_insert(dict, (wchar_t *)wword); dictionary_save_lang(dict, dict_location); } gtk_widget_destroy(ask_dialog); break; } gtk_widget_destroy(dialog); } }