static gint32 string_invariant_compare_char (gunichar2 c1, gunichar2 c2, gint32 options) { gint32 result; /* Ordinal can not be mixed with other options, and must return the difference, not only -1, 0, 1 */ if (options & CompareOptions_Ordinal) return (gint32) c1 - c2; if (options & CompareOptions_IgnoreCase) { GUnicodeType c1type, c2type; c1type = g_unichar_type (c1); c2type = g_unichar_type (c2); result = (gint32) (c1type != G_UNICODE_LOWERCASE_LETTER ? g_unichar_tolower(c1) : c1) - (c2type != G_UNICODE_LOWERCASE_LETTER ? g_unichar_tolower(c2) : c2); } else { /* * No options. Kana, symbol and spacing options don't * apply to the invariant culture. */ /* * FIXME: here we must use the information from c1type and c2type * to find out the proper collation, even on the InvariantCulture, the * sorting is not done by computing the unicode values, but their * actual sort order. */ result = (gint32) c1 - c2; } return ((result < 0) ? -1 : (result > 0) ? 1 : 0); }
static int enchant_is_title_case(const char*const word, size_t len) { gunichar ch; GUnicodeType type; const char* it = word; g_return_val_if_fail (word && *word, 0); ch = g_utf8_get_char(it); type = g_unichar_type(ch); if(type != G_UNICODE_UPPERCASE_LETTER && type != G_UNICODE_TITLECASE_LETTER) return 0; if(ch != g_unichar_totitle(ch) ) return 0; for(it = g_utf8_next_char(it); it < word + len; it = g_utf8_next_char(it)) { type = g_unichar_type(g_utf8_get_char(it)); if(type == G_UNICODE_UPPERCASE_LETTER || type == G_UNICODE_TITLECASE_LETTER) return 0; } return 1; }
/* * g_unichar_type */ RESULT test_g_unichar_type () { if (g_unichar_type ('A') != G_UNICODE_UPPERCASE_LETTER) return FAILED ("#1"); if (g_unichar_type ('a') != G_UNICODE_LOWERCASE_LETTER) return FAILED ("#2"); if (g_unichar_type ('1') != G_UNICODE_DECIMAL_NUMBER) return FAILED ("#3"); if (g_unichar_type (0xA3) != G_UNICODE_CURRENCY_SYMBOL) return FAILED ("#4"); return NULL; }
static int str_unichar_iscombiningmark (gunichar uni) { int type = g_unichar_type (uni); return (type == G_UNICODE_COMBINING_MARK) || (type == G_UNICODE_ENCLOSING_MARK) || (type == G_UNICODE_NON_SPACING_MARK); }
/* RFC 3454, Appendix C. ish. */ static inline gboolean idna_is_prohibited (gunichar ch) { switch (g_unichar_type (ch)) { case G_UNICODE_CONTROL: case G_UNICODE_FORMAT: case G_UNICODE_UNASSIGNED: case G_UNICODE_PRIVATE_USE: case G_UNICODE_SURROGATE: case G_UNICODE_LINE_SEPARATOR: case G_UNICODE_PARAGRAPH_SEPARATOR: case G_UNICODE_SPACE_SEPARATOR: return TRUE; case G_UNICODE_OTHER_SYMBOL: if (ch == 0xFFFC || ch == 0xFFFD || (ch >= 0x2FF0 && ch <= 0x2FFB)) return TRUE; return FALSE; case G_UNICODE_NON_SPACING_MARK: if (ch == 0x0340 || ch == 0x0341) return TRUE; return FALSE; default: return FALSE; } }
/** * stripped_char: * * Returns a stripped version of @ch, removing any case, accentuation * mark, or any special mark on it. **/ static gunichar stripped_char (gunichar ch) { gunichar *decomp, retval; GUnicodeType utype; gsize dlen; utype = g_unichar_type (ch); switch (utype) { case G_UNICODE_CONTROL: case G_UNICODE_FORMAT: case G_UNICODE_UNASSIGNED: case G_UNICODE_COMBINING_MARK: /* Ignore those */ return 0; break; default: /* Convert to lowercase, fall through */ ch = g_unichar_tolower (ch); case G_UNICODE_LOWERCASE_LETTER: if ((decomp = g_unicode_canonical_decomposition (ch, &dlen))) { retval = decomp[0]; g_free (decomp); return retval; } break; } return 0; }
static gboolean str_unichar_iscombiningmark (gunichar uni) { GUnicodeType type; type = g_unichar_type (uni); return (type == G_UNICODE_COMBINING_MARK) || (type == G_UNICODE_ENCLOSING_MARK) || (type == G_UNICODE_NON_SPACING_MARK); }
bool UT_isWordDelimiter(UT_UCSChar currentChar, UT_UCSChar followChar, UT_UCSChar prevChar) { // fast track Ascii letters if('a' <= currentChar && currentChar <= 'z') return false; if('A' <= currentChar && currentChar <= 'Z') return false; switch (g_unichar_type(currentChar)) { case G_UNICODE_MODIFIER_LETTER: case G_UNICODE_LOWERCASE_LETTER: case G_UNICODE_TITLECASE_LETTER: case G_UNICODE_UPPERCASE_LETTER: case G_UNICODE_OTHER_LETTER: case G_UNICODE_COMBINING_MARK: case G_UNICODE_ENCLOSING_MARK: case G_UNICODE_NON_SPACING_MARK: case G_UNICODE_DECIMAL_NUMBER: case G_UNICODE_LETTER_NUMBER: case G_UNICODE_OTHER_NUMBER: return false; case G_UNICODE_CONNECT_PUNCTUATION: return (currentChar == '_'); // _ is a word separator! case G_UNICODE_OTHER_PUNCTUATION: case G_UNICODE_INITIAL_PUNCTUATION: case G_UNICODE_FINAL_PUNCTUATION: switch (currentChar) { // some punctuation can be internal in word case 0x0022: // QUOTATION MARK case 0x0027: // APOSTROPHE case UCS_LDBLQUOTE: // smart quote, open double /* wjc */ case UCS_RDBLQUOTE: // smart quote, close double /* wjc */ case UCS_LQUOTE: // smart quote, open single /* wjc */ case UCS_RQUOTE: // smart quote, close single case 0x055F: // ARMENIAN ABBREVIATION MARK case 0x070A: // SYRIAC CONTRACTION case 0x070F: // SYRIAC ABBREVIATION MARK case 0x0970: // DEVANAGARI ABBREVIATION SIGN if (UT_UCS4_isalpha(followChar) && UT_UCS4_isalpha(prevChar)) return false; else return true; default: return true; } default: return true; } // switch }
static int is_word_char (gunichar uc, size_t n) { GUnicodeType type; if (uc == g_utf8_get_char("'") || uc == g_utf8_get_char("’")) { return 1; } type = g_unichar_type(uc); switch (type) { case G_UNICODE_MODIFIER_LETTER: case G_UNICODE_LOWERCASE_LETTER: case G_UNICODE_TITLECASE_LETTER: case G_UNICODE_UPPERCASE_LETTER: case G_UNICODE_OTHER_LETTER: case G_UNICODE_COMBINING_MARK: /* Older name for G_UNICODE_SPACING_MARK; deprecated since glib 2.30 */ case G_UNICODE_ENCLOSING_MARK: case G_UNICODE_NON_SPACING_MARK: case G_UNICODE_DECIMAL_NUMBER: case G_UNICODE_LETTER_NUMBER: case G_UNICODE_OTHER_NUMBER: case G_UNICODE_CONNECT_PUNCTUATION: return 1; /* Enchant 1.3.0 defines word chars like this. */ case G_UNICODE_DASH_PUNCTUATION: if ((n > 0) && (type == G_UNICODE_DASH_PUNCTUATION)) { return 1; /* hyphens only accepted within a word. */ } /* Fallthrough */ case G_UNICODE_CONTROL: case G_UNICODE_FORMAT: case G_UNICODE_UNASSIGNED: case G_UNICODE_PRIVATE_USE: case G_UNICODE_SURROGATE: case G_UNICODE_CLOSE_PUNCTUATION: case G_UNICODE_FINAL_PUNCTUATION: case G_UNICODE_INITIAL_PUNCTUATION: case G_UNICODE_OTHER_PUNCTUATION: case G_UNICODE_OPEN_PUNCTUATION: case G_UNICODE_CURRENCY_SYMBOL: case G_UNICODE_MODIFIER_SYMBOL: case G_UNICODE_MATH_SYMBOL: case G_UNICODE_OTHER_SYMBOL: case G_UNICODE_LINE_SEPARATOR: case G_UNICODE_PARAGRAPH_SEPARATOR: case G_UNICODE_SPACE_SEPARATOR: default: return 0; } }
static gboolean mcview_is_spacing_mark (const WView * view, int c) { #ifdef HAVE_CHARSET if (view->utf8) return g_unichar_type (c) == SPACING_MARK; #else (void) view; (void) c; #endif /* HAVE_CHARSET */ return FALSE; }
static int enchant_is_all_caps(const char*const word, size_t len) { const char* it; int hasCap = 0; g_return_val_if_fail (word && *word, 0); for(it = word; it < word + len; it = g_utf8_next_char(it)) { GUnicodeType type = g_unichar_type(g_utf8_get_char(it)); switch(type) { case G_UNICODE_UPPERCASE_LETTER: hasCap = 1; break; case G_UNICODE_TITLECASE_LETTER: case G_UNICODE_LOWERCASE_LETTER: return 0; case G_UNICODE_CONTROL: case G_UNICODE_FORMAT: case G_UNICODE_UNASSIGNED: case G_UNICODE_PRIVATE_USE: case G_UNICODE_SURROGATE: case G_UNICODE_MODIFIER_LETTER: case G_UNICODE_OTHER_LETTER: case G_UNICODE_COMBINING_MARK: case G_UNICODE_ENCLOSING_MARK: case G_UNICODE_NON_SPACING_MARK: case G_UNICODE_DECIMAL_NUMBER: case G_UNICODE_LETTER_NUMBER: case G_UNICODE_OTHER_NUMBER: case G_UNICODE_CONNECT_PUNCTUATION: case G_UNICODE_DASH_PUNCTUATION: case G_UNICODE_CLOSE_PUNCTUATION: case G_UNICODE_FINAL_PUNCTUATION: case G_UNICODE_INITIAL_PUNCTUATION: case G_UNICODE_OTHER_PUNCTUATION: case G_UNICODE_OPEN_PUNCTUATION: case G_UNICODE_CURRENCY_SYMBOL: case G_UNICODE_MODIFIER_SYMBOL: case G_UNICODE_MATH_SYMBOL: case G_UNICODE_OTHER_SYMBOL: case G_UNICODE_LINE_SEPARATOR: case G_UNICODE_PARAGRAPH_SEPARATOR: case G_UNICODE_SPACE_SEPARATOR: default: break; } } return hasCap; }
static guint infinoted_plugin_linekeeper_count_lines(InfTextBuffer* buffer) { /* Count the number of lines at the end of the document. This assumes the * buffer content is in UTF-8, which is currently hardcoded in infinoted. */ InfTextBufferIter* iter; guint n_lines; gboolean has_iter; guint length; gsize bytes; gchar* text; gchar* pos; gchar* new_pos; gunichar c; g_assert(strcmp(inf_text_buffer_get_encoding(buffer), "UTF-8") == 0); n_lines = 0; iter = inf_text_buffer_create_end_iter(buffer); if(iter == NULL) return 0; do { length = inf_text_buffer_iter_get_length(buffer, iter); bytes = inf_text_buffer_iter_get_bytes(buffer, iter); text = inf_text_buffer_iter_get_text(buffer, iter); pos = text + bytes; while(length > 0) { new_pos = g_utf8_prev_char(pos); g_assert(bytes >= (pos - new_pos)); c = g_utf8_get_char(new_pos); if(c == '\n' || g_unichar_type(c) == G_UNICODE_LINE_SEPARATOR) ++n_lines; else break; --length; bytes -= (pos - new_pos); pos = new_pos; } g_free(text); } while(length == 0 && inf_text_buffer_iter_prev(buffer, iter)); inf_text_buffer_destroy_iter(buffer, iter); return n_lines; }
bool IsFirstLetterCapitalOrTitleCase(const std::string& word) { gunichar ch; GUnicodeType type; ch = g_utf8_get_char(word.c_str()); type = g_unichar_type(ch); if(type == G_UNICODE_UPPERCASE_LETTER || type == G_UNICODE_TITLECASE_LETTER) return true; return false; }
/** * tracker_text_normalize: * @text: the text to normalize * @max_words: the maximum words of @text to normalize * @n_words: the number of words actually normalized * * This function iterates through @text checking for UTF-8 validity * using g_utf8_get_char_validated(). For each character found, the * %GUnicodeType is checked to make sure it is one fo the following * values: * <itemizedlist> * <listitem><para>%G_UNICODE_LOWERCASE_LETTER</para></listitem> * <listitem><para>%G_UNICODE_MODIFIER_LETTER</para></listitem> * <listitem><para>%G_UNICODE_OTHER_LETTER</para></listitem> * <listitem><para>%G_UNICODE_TITLECASE_LETTER</para></listitem> * <listitem><para>%G_UNICODE_UPPERCASE_LETTER</para></listitem> * </itemizedlist> * * All other symbols, punctuation, marks, numbers and separators are * stripped. A regular space (i.e. " ") is used to separate the words * in the returned string. * * The @n_words can be %NULL. If specified, it will be populated with * the number of words that were normalized in the result. * * Returns: a newly-allocated string holding the result which should * be freed with g_free() when finished with, otherwise %NULL. * * Since: 0.8 * * Deprecated: 0.10: Use tracker_text_validate_utf8() instead. **/ gchar * tracker_text_normalize (const gchar *text, guint max_words, guint *n_words) { GString *string; gboolean in_break = TRUE; gunichar ch; gint words = 0; string = g_string_new (NULL); while ((ch = g_utf8_get_char_validated (text, -1)) > 0) { GUnicodeType type; type = g_unichar_type (ch); if (type == G_UNICODE_LOWERCASE_LETTER || type == G_UNICODE_MODIFIER_LETTER || type == G_UNICODE_OTHER_LETTER || type == G_UNICODE_TITLECASE_LETTER || type == G_UNICODE_UPPERCASE_LETTER) { /* Append regular chars */ g_string_append_unichar (string, ch); in_break = FALSE; } else if (!in_break) { /* Non-regular char found, treat as word break */ g_string_append_c (string, ' '); in_break = TRUE; words++; if (words > max_words) { break; } } text = g_utf8_find_next_char (text, NULL); } if (n_words) { if (!in_break) { /* Count the last word */ words += 1; } *n_words = words; } return g_string_free (string, FALSE); }
/* actually is_non_spacing_mark_or_enclosing_mark */ static gboolean mcview_is_non_spacing_mark (const WView * view, int c) { #ifdef HAVE_CHARSET if (view->utf8) { GUnicodeType type; type = g_unichar_type (c); return type == G_UNICODE_NON_SPACING_MARK || type == G_UNICODE_ENCLOSING_MARK; } #else (void) view; (void) c; #endif /* HAVE_CHARSET */ return FALSE; }
static gboolean exact_prefix_cmp (const gchar *string, const gchar *prefix, guint prefix_len) { GUnicodeType type; if (strncmp (string, prefix, prefix_len) != 0) return FALSE; if (string[prefix_len] == '\0') return TRUE; type = g_unichar_type (g_utf8_get_char (string + prefix_len)); /* If string contains prefix, check that prefix is not followed * by a unicode mark symbol, e.g. that trailing 'a' in prefix * is not part of two-char a-with-hat symbol in string. */ return type != G_UNICODE_COMBINING_MARK && type != G_UNICODE_ENCLOSING_MARK && type != G_UNICODE_NON_SPACING_MARK; }
bool IsWordAllCaps(const std::string& word) { const char* it, *itEnd; bool hasCap = false; for(it = word.c_str(), itEnd = it+word.length(); it < itEnd; it = g_utf8_next_char(it)) { GUnicodeType type = g_unichar_type(g_utf8_get_char(it)); switch(type) { case G_UNICODE_UPPERCASE_LETTER: hasCap = true; break; case G_UNICODE_TITLECASE_LETTER: case G_UNICODE_LOWERCASE_LETTER: return false; } } return hasCap; }
static gunichar json_scanner_get_unichar (JsonScanner *scanner, guint *line_p, guint *position_p) { gunichar uchar; gchar ch; gint i; uchar = 0; for (i = 0; i < 4; i++) { ch = json_scanner_get_char (scanner, line_p, position_p); if (is_hex_digit (ch)) uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4)); else break; } g_assert (g_unichar_validate (uchar) || g_unichar_type (uchar) == G_UNICODE_SURROGATE); return uchar; }
NS_IMETHODIMP sbStringTransformImpl::NormalizeString(const nsAString & aCharset, PRUint32 aTransformFlags, const nsAString & aInput, nsAString & _retval) { nsCString str; CopyUTF16toUTF8(aInput, str); if(aTransformFlags & sbIStringTransform::TRANSFORM_LOWERCASE) { gchar* lowercaseStr = g_utf8_strdown(str.BeginReading(), str.Length()); NS_ENSURE_TRUE(lowercaseStr, NS_ERROR_OUT_OF_MEMORY); str.Assign(lowercaseStr); g_free(lowercaseStr); } if(aTransformFlags & sbIStringTransform::TRANSFORM_UPPERCASE) { gchar* uppercaseStr = g_utf8_strup(str.BeginReading(), str.Length()); NS_ENSURE_TRUE(uppercaseStr, NS_ERROR_OUT_OF_MEMORY); str.Assign(uppercaseStr); g_free(uppercaseStr); } if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONSPACE) { nsString workingStr; PRBool leadingOnly = aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_LEADING; PRBool bypassTest = PR_FALSE; gchar* nonspaceStr = g_utf8_normalize(str.BeginReading(), str.Length(), G_NORMALIZE_ALL); NS_ENSURE_TRUE(nonspaceStr, NS_ERROR_OUT_OF_MEMORY); glong strLen = g_utf8_strlen(nonspaceStr, -1); for(glong currentChar = 0; currentChar < strLen; ++currentChar) { gchar* offset = g_utf8_offset_to_pointer(nonspaceStr, currentChar); gunichar unichar = g_utf8_get_char(offset); GUnicodeType unicharType = g_unichar_type(unichar); if(bypassTest || (unicharType != G_UNICODE_NON_SPACING_MARK && unicharType != G_UNICODE_COMBINING_MARK && unicharType != G_UNICODE_ENCLOSING_MARK)) { workingStr += unichar; if(leadingOnly) bypassTest = PR_TRUE; } } g_free(nonspaceStr); CopyUTF16toUTF8(workingStr, str); } if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_SYMBOLS) { nsString workingStr; PRBool leadingOnly = aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_LEADING; PRBool bypassTest = PR_FALSE; gchar* nosymbolsStr = g_utf8_normalize(str.BeginReading(), str.Length(), G_NORMALIZE_ALL); NS_ENSURE_TRUE(nosymbolsStr, NS_ERROR_OUT_OF_MEMORY); glong strLen = g_utf8_strlen(nosymbolsStr, -1); for(glong currentChar = 0; currentChar < strLen; ++currentChar) { gchar* offset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar); gunichar unichar = g_utf8_get_char(offset); GUnicodeType unicharType = g_unichar_type(unichar); if (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS) { PRInt32 numberLength; SB_ExtractLeadingNumber((const gchar *)offset, NULL, NULL, &numberLength); if (numberLength > 0) { for (glong copychar=0;copychar < numberLength;copychar++) { gchar* copyoffset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar+copychar); gunichar unichar = g_utf8_get_char(copyoffset); workingStr += unichar; } currentChar += numberLength-1; if(leadingOnly) bypassTest = PR_TRUE; continue; } } if(bypassTest || (unicharType != G_UNICODE_CURRENCY_SYMBOL && unicharType != G_UNICODE_MODIFIER_SYMBOL && unicharType != G_UNICODE_MATH_SYMBOL && unicharType != G_UNICODE_OTHER_SYMBOL)) { workingStr += unichar; if(leadingOnly) bypassTest = PR_TRUE; } } g_free(nosymbolsStr); CopyUTF16toUTF8(workingStr, str); } if((aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM) || (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE)) { nsString workingStr; PRBool leadingOnly = aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_LEADING; PRBool bypassTest = PR_FALSE; gchar* nosymbolsStr = g_utf8_normalize(str.BeginReading(), str.Length(), G_NORMALIZE_ALL); NS_ENSURE_TRUE(nosymbolsStr, NS_ERROR_OUT_OF_MEMORY); glong strLen = g_utf8_strlen(nosymbolsStr, -1); for(glong currentChar = 0; currentChar < strLen; ++currentChar) { gchar* offset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar); gunichar unichar = g_utf8_get_char(offset); GUnicodeType unicharType = g_unichar_type(unichar); if (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS) { PRInt32 numberLength; SB_ExtractLeadingNumber((const gchar *)offset, NULL, NULL, &numberLength); if (numberLength > 0) { for (glong copychar=0;copychar < numberLength;copychar++) { gchar* copyoffset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar+copychar); gunichar unichar = g_utf8_get_char(copyoffset); workingStr += unichar; } currentChar += numberLength-1; if(leadingOnly) bypassTest = PR_TRUE; continue; } } if(bypassTest || (unicharType == G_UNICODE_LOWERCASE_LETTER || unicharType == G_UNICODE_MODIFIER_LETTER || unicharType == G_UNICODE_OTHER_LETTER || unicharType == G_UNICODE_TITLECASE_LETTER || unicharType == G_UNICODE_UPPERCASE_LETTER || unicharType == G_UNICODE_DECIMAL_NUMBER || unicharType == G_UNICODE_LETTER_NUMBER || unicharType == G_UNICODE_OTHER_NUMBER) || (!(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE) && unichar == ' ')) { workingStr += unichar; if(leadingOnly) bypassTest = PR_TRUE; } } g_free(nosymbolsStr); CopyUTF16toUTF8(workingStr, str); } CopyUTF8toUTF16(str, _retval); return NS_OK; }
static void basic_engine_shape (PangoEngineShape *engine, PangoFont *font, const char *text, gint length, PangoAnalysis *analysis, PangoGlyphString *glyphs) { int n_chars; int i; const char *p; g_return_if_fail (font != NULL); g_return_if_fail (text != NULL); g_return_if_fail (length >= 0); g_return_if_fail (analysis != NULL); #ifdef HAVE_USP10_H if (have_uniscribe && !text_is_simple (text, length) && uniscribe_shape (font, text, length, analysis, glyphs)) return; #endif n_chars = g_utf8_strlen (text, length); pango_glyph_string_set_size (glyphs, n_chars); p = text; for (i = 0; i < n_chars; i++) { gunichar wc; gunichar mirrored_ch; PangoGlyph index; wc = g_utf8_get_char (p); if (analysis->level % 2) if (pango_get_mirror_char (wc, &mirrored_ch)) wc = mirrored_ch; if (wc == 0xa0) /* non-break-space */ wc = 0x20; if (pango_is_zero_width (wc)) { set_glyph (font, glyphs, i, p - text, PANGO_GLYPH_EMPTY); } else { index = find_char (font, wc); if (index) { set_glyph (font, glyphs, i, p - text, index); if (g_unichar_type (wc) == G_UNICODE_NON_SPACING_MARK) { if (i > 0) { PangoRectangle logical_rect, ink_rect; glyphs->glyphs[i].geometry.width = MAX (glyphs->glyphs[i-1].geometry.width, glyphs->glyphs[i].geometry.width); glyphs->glyphs[i-1].geometry.width = 0; glyphs->log_clusters[i] = glyphs->log_clusters[i-1]; /* Some heuristics to try to guess how overstrike glyphs are * done and compensate */ /* FIXME: (alex) Is this double call to get_glyph_extents really necessary? */ pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, &ink_rect, &logical_rect); if (logical_rect.width == 0 && ink_rect.x == 0) glyphs->glyphs[i].geometry.x_offset = (glyphs->glyphs[i].geometry.width - ink_rect.width) / 2; } } } else set_glyph (font, glyphs, i, p - text, PANGO_GET_UNKNOWN_GLYPH (wc)); } p = g_utf8_next_char (p); } /* Simple bidi support... may have separate modules later */ if (analysis->level % 2) { int start, end; /* Swap all glyphs */ swap_range (glyphs, 0, n_chars); /* Now reorder glyphs within each cluster back to LTR */ for (start = 0; start < n_chars;) { end = start; while (end < n_chars && glyphs->log_clusters[end] == glyphs->log_clusters[start]) end++; swap_range (glyphs, start, end); start = end; } } }
/** * gsdl_tokenizer_next: * @self: A valid %GSDLTokenizer. * @result: (out callee-allocates): A %GSDLToken to initialize and fill in. * @err: (out) (allow-none): Location to store any error, may be %NULL. * * Fetches the next token from the input. Depending on the source of input, may set an error in one * of the %GSDL_SYNTAX_ERROR, %G_IO_CHANNEL_ERROR, or %G_CONVERT_ERROR domains. * * Returns: Whether a token could be successfully read. */ bool gsdl_tokenizer_next(GSDLTokenizer *self, GSDLToken **result, GError **err) { gunichar c, nc; int line; int col; retry: line = self->line; col = self->col; if (!_read(self, &c, err)) return false; if (G_UNLIKELY(c == EOF)) { *result = _maketoken(T_EOF, line, col); return true; } else if (c == '\r') { if (_peek(self, &c, err) && c == '\n') _consume(self); *result = _maketoken('\n', line, col); FAIL_IF_ERR(); return true; } else if ((c == '/' && _peek(self, &nc, err) && nc == '/') || (c == '-' && _peek(self, &nc, err) && nc == '-') || c == '#') { if (c != '#') _consume(self); while (_peek(self, &c, err) && !(c == '\n' || c == EOF)) _consume(self); goto retry; } else if (c == '/' && _peek(self, &nc, err) && nc == '*') { while (_read(self, &c, err)) { if (c == EOF) { _set_error(err, self, GSDL_SYNTAX_ERROR_UNEXPECTED_CHAR, "Unterminated comment" ); return false; } else if (c == '*' && _peek(self, &c, err) && c == '/') { _consume(self); break; } } goto retry; } else if (c < 256 && strchr("-+:;./{}=\n", (char) c)) { *result = _maketoken(c, line, col); return true; } else if (c < 256 && isdigit((char) c)) { *result = _maketoken(T_NUMBER, line, col); return _tokenize_number(self, *result, c, err); } else if (g_unichar_isalpha(c) || g_unichar_type(c) == G_UNICODE_CONNECT_PUNCTUATION || g_unichar_type(c) == G_UNICODE_CURRENCY_SYMBOL) { *result = _maketoken(T_IDENTIFIER, line, col); return _tokenize_identifier(self, *result, c, err); } else if (c == '[') { *result = _maketoken(T_BINARY, line, col); if (!_tokenize_binary(self, *result, err)) return false; REQUIRE(_read(self, &c, err)); if (c == ']') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing ']'" ); return false; } } else if (c == '"') { *result = _maketoken(T_STRING, line, col); if (!_tokenize_string(self, *result, err)) return false; REQUIRE(_read(self, &c, err)); if (c == '"') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing '\"'" ); return false; } } else if (c == '`') { *result = _maketoken(T_STRING, line, col); if (!_tokenize_backquote_string(self, *result, err)) return false; REQUIRE(_read(self, &c, err)); if (c == '`') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing '`'" ); return false; } } else if (c == '\'') { *result = _maketoken(T_CHAR, line, col); (*result)->val = g_malloc0(4); _read(self, &c, err); if (c == '\\') { _read(self, &c, err); switch (c) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case '"': c = '"'; break; case '\'': c = '\''; break; case '\\': c = '\\'; break; } } g_unichar_to_utf8(c, (*result)->val); REQUIRE(_read(self, &c, err)); if (c == '\'') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing \"'\"" ); return false; } } else if (c == '\\' && _peek(self, &nc, err) && (nc == '\r' || nc == '\n')) { _consume(self); if (c == '\r') _read(self, &c, err); goto retry; } else if (c == ' ' || c == '\t') { // Do nothing goto retry; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_UNEXPECTED_CHAR, g_strdup_printf("Invalid character '%s'(%d)", g_ucs4_to_utf8(&c, 1, NULL, NULL, NULL), c) ); return false; } }
static bool _tokenize_identifier(GSDLTokenizer *self, GSDLToken *result, gunichar c, GError **err) { int length = 7; char *output = result->val = g_malloc(length); GUnicodeType type; int i = g_unichar_to_utf8(c, output); while (_peek(self, &c, err) && (c == '-' || c == '.' || g_unichar_isalpha(c) || g_unichar_isdigit(c) || (type = g_unichar_type(c)) == G_UNICODE_CURRENCY_SYMBOL || type == G_UNICODE_CONNECT_PUNCTUATION || type == G_UNICODE_LETTER_NUMBER || type == G_UNICODE_SPACING_MARK || type == G_UNICODE_NON_SPACING_MARK)) { GROW_IF_NEEDED(output = result->val, i + 5, length); _consume(self); i += g_unichar_to_utf8(c, output + i); } FAIL_IF_ERR(); output[i] = '\0'; if ( strcmp(output, "true") == 0 || strcmp(output, "on") == 0 || strcmp(output, "false") == 0 || strcmp(output, "off") == 0) { result->type = T_BOOLEAN; } else if (strcmp(output, "null") == 0) { result->type = T_NULL; } return true; }
/** * Get one base character, along with its combining or spacing mark characters. * * (A spacing mark is a character that extends the base character's width 1 into a combined * character of width 2, yet these two character cells should not be separated. E.g. Devanagari * <U+0939><U+094B>.) * * This method exists mainly for two reasons. One is to be able to tell if we fit on the current * line or need to wrap to the next one. The other is that both slang and ncurses seem to require * that the character and its combining marks are printed in a single call (or is it just a * limitation of mc's wrapper to them?). * * For convenience, this method takes care of converting CR or CR+LF into LF. * TODO this should probably happen later, when displaying the file? * * Normally: stores cs and color, updates state, returns >= 1 (entries in cs). * At EOF: state is unchanged, cs and color are undefined, returns 0. * * @param view ... * @param state the parser-formatter state machine's state, updated * @param cs store the characters here * @param clen the room available in cs (that is, at most clen-1 combining marks are allowed), must * be at least 2 * @param color if non-NULL, store the color here, taken from the first codepoint's color * @return the number of entries placed in cs, or 0 on EOF */ static int mcview_next_combining_char_sequence (WView * view, mcview_state_machine_t * state, int *cs, int clen, int *color) { int i = 1; if (!mcview_get_next_maybe_nroff_char (view, state, cs, color)) return 0; /* Process \r and \r\n newlines. */ if (cs[0] == '\r') { int cnext; mcview_state_machine_t state_after_crlf = *state; if (mcview_get_next_maybe_nroff_char (view, &state_after_crlf, &cnext, NULL) && cnext == '\n') *state = state_after_crlf; cs[0] = '\n'; return 1; } /* We don't want combining over non-printable characters. This includes '\n' and '\t' too. */ if (!mcview_isprint (view, cs[0])) return 1; if (mcview_ismark (view, cs[0])) { if (!state->print_lonely_combining) { /* First character is combining. Either just return it, ... */ return 1; } else { /* or place this (and subsequent combining ones) over a dotted circle. */ cs[1] = cs[0]; cs[0] = BASE_CHARACTER_FOR_LONELY_COMBINING; i = 2; } } if (mcview_wcwidth (view, cs[0]) == 2) { /* Don't allow combining or spacing mark for wide characters, is this okay? */ return 1; } /* Look for more combining chars. Either at most clen-1 zero-width combining chars, * or at most 1 spacing mark. Is this logic correct? */ for (; i < clen; i++) { mcview_state_machine_t state_after_combining; state_after_combining = *state; if (!mcview_get_next_maybe_nroff_char (view, &state_after_combining, &cs[i], NULL)) return i; if (!mcview_ismark (view, cs[i]) || !mcview_isprint (view, cs[i])) return i; if (g_unichar_type (cs[i]) == SPACING_MARK) { /* Only allow as the first combining char. Stop processing in either case. */ if (i == 1) { *state = state_after_combining; i++; } return i; } *state = state_after_combining; } return i; }
static void json_scanner_get_token_ll (JsonScanner *scanner, GTokenType *token_p, GTokenValue *value_p, guint *line_p, guint *position_p) { JsonScannerConfig *config; GTokenType token; gboolean in_comment_multi; gboolean in_comment_single; gboolean in_string_sq; gboolean in_string_dq; GString *gstring; GTokenValue value; guchar ch; config = scanner->config; (*value_p).v_int64 = 0; if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) || scanner->token == G_TOKEN_EOF) { *token_p = G_TOKEN_EOF; return; } in_comment_multi = FALSE; in_comment_single = FALSE; in_string_sq = FALSE; in_string_dq = FALSE; gstring = NULL; do /* while (ch != 0) */ { gboolean dotted_float = FALSE; ch = json_scanner_get_char (scanner, line_p, position_p); value.v_int64 = 0; token = G_TOKEN_NONE; /* this is *evil*, but needed ;( * we first check for identifier first character, because it * might interfere with other key chars like slashes or numbers */ if (config->scan_identifier && ch && strchr (config->cset_identifier_first, ch)) goto identifier_precedence; switch (ch) { case 0: token = G_TOKEN_EOF; (*position_p)++; /* ch = 0; */ break; case '/': if (!config->scan_comment_multi || json_scanner_peek_next_char (scanner) != '*') goto default_case; json_scanner_get_char (scanner, line_p, position_p); token = G_TOKEN_COMMENT_MULTI; in_comment_multi = TRUE; gstring = g_string_new (NULL); while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '*' && json_scanner_peek_next_char (scanner) == '/') { json_scanner_get_char (scanner, line_p, position_p); in_comment_multi = FALSE; break; } else gstring = g_string_append_c (gstring, ch); } ch = 0; break; case '\'': if (!config->scan_string_sq) goto default_case; token = G_TOKEN_STRING; in_string_sq = TRUE; gstring = g_string_new (NULL); while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '\'') { in_string_sq = FALSE; break; } else gstring = g_string_append_c (gstring, ch); } ch = 0; break; case '"': if (!config->scan_string_dq) goto default_case; token = G_TOKEN_STRING; in_string_dq = TRUE; gstring = g_string_new (NULL); while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '"') { in_string_dq = FALSE; break; } else { if (ch == '\\') { ch = json_scanner_get_char (scanner, line_p, position_p); switch (ch) { guint i; guint fchar; case 0: break; case '\\': gstring = g_string_append_c (gstring, '\\'); break; case 'n': gstring = g_string_append_c (gstring, '\n'); break; case 't': gstring = g_string_append_c (gstring, '\t'); break; case 'r': gstring = g_string_append_c (gstring, '\r'); break; case 'b': gstring = g_string_append_c (gstring, '\b'); break; case 'f': gstring = g_string_append_c (gstring, '\f'); break; case 'u': fchar = json_scanner_peek_next_char (scanner); if (is_hex_digit (fchar)) { gunichar ucs; ucs = json_scanner_get_unichar (scanner, line_p, position_p); if (g_unichar_type (ucs) == G_UNICODE_SURROGATE) { /* read next surrogate */ if ('\\' == json_scanner_get_char (scanner, line_p, position_p) && 'u' == json_scanner_get_char (scanner, line_p, position_p)) { gunichar ucs_lo = json_scanner_get_unichar (scanner, line_p, position_p); g_assert (g_unichar_type (ucs_lo) == G_UNICODE_SURROGATE); ucs = (((ucs & 0x3ff) << 10) | (ucs_lo & 0x3ff)) + 0x10000; } } g_assert (g_unichar_validate (ucs)); gstring = g_string_append_unichar (gstring, ucs); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': i = ch - '0'; fchar = json_scanner_peek_next_char (scanner); if (fchar >= '0' && fchar <= '7') { ch = json_scanner_get_char (scanner, line_p, position_p); i = i * 8 + ch - '0'; fchar = json_scanner_peek_next_char (scanner); if (fchar >= '0' && fchar <= '7') { ch = json_scanner_get_char (scanner, line_p, position_p); i = i * 8 + ch - '0'; } } gstring = g_string_append_c (gstring, i); break; default: gstring = g_string_append_c (gstring, ch); break; } } else gstring = g_string_append_c (gstring, ch); } } ch = 0; break; case '.': if (!config->scan_float) goto default_case; token = G_TOKEN_FLOAT; dotted_float = TRUE; ch = json_scanner_get_char (scanner, line_p, position_p); goto number_parsing; case '$': if (!config->scan_hex_dollar) goto default_case; token = G_TOKEN_HEX; ch = json_scanner_get_char (scanner, line_p, position_p); goto number_parsing; case '0': if (config->scan_octal) token = G_TOKEN_OCTAL; else token = G_TOKEN_INT; ch = json_scanner_peek_next_char (scanner); if (config->scan_hex && (ch == 'x' || ch == 'X')) { token = G_TOKEN_HEX; json_scanner_get_char (scanner, line_p, position_p); ch = json_scanner_get_char (scanner, line_p, position_p); if (ch == 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_UNEXP_EOF; (*position_p)++; break; } if (json_scanner_char_2_num (ch, 16) < 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_DIGIT_RADIX; ch = 0; break; } } else if (config->scan_binary && (ch == 'b' || ch == 'B')) { token = G_TOKEN_BINARY; json_scanner_get_char (scanner, line_p, position_p); ch = json_scanner_get_char (scanner, line_p, position_p); if (ch == 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_UNEXP_EOF; (*position_p)++; break; } if (json_scanner_char_2_num (ch, 10) < 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; ch = 0; break; } } else ch = '0'; /* fall through */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': number_parsing: { gboolean in_number = TRUE; gchar *endptr; if (token == G_TOKEN_NONE) token = G_TOKEN_INT; gstring = g_string_new (dotted_float ? "0." : ""); gstring = g_string_append_c (gstring, ch); do /* while (in_number) */ { gboolean is_E; is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E'); ch = json_scanner_peek_next_char (scanner); if (json_scanner_char_2_num (ch, 36) >= 0 || (config->scan_float && ch == '.') || (is_E && (ch == '+' || ch == '-'))) { ch = json_scanner_get_char (scanner, line_p, position_p); switch (ch) { case '.': if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL) { value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX; token = G_TOKEN_ERROR; in_number = FALSE; } else { token = G_TOKEN_FLOAT; gstring = g_string_append_c (gstring, ch); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': gstring = g_string_append_c (gstring, ch); break; case '-': case '+': if (token != G_TOKEN_FLOAT) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; in_number = FALSE; } else gstring = g_string_append_c (gstring, ch); break; case 'e': case 'E': if ((token != G_TOKEN_HEX && !config->scan_float) || (token != G_TOKEN_HEX && token != G_TOKEN_OCTAL && token != G_TOKEN_FLOAT && token != G_TOKEN_INT)) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; in_number = FALSE; } else { if (token != G_TOKEN_HEX) token = G_TOKEN_FLOAT; gstring = g_string_append_c (gstring, ch); } break; default: if (token != G_TOKEN_HEX) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; in_number = FALSE; } else gstring = g_string_append_c (gstring, ch); break; } } else in_number = FALSE; } while (in_number); endptr = NULL; if (token == G_TOKEN_FLOAT) value.v_float = g_strtod (gstring->str, &endptr); else { guint64 ui64 = 0; switch (token) { case G_TOKEN_BINARY: ui64 = g_ascii_strtoull (gstring->str, &endptr, 2); break; case G_TOKEN_OCTAL: ui64 = g_ascii_strtoull (gstring->str, &endptr, 8); break; case G_TOKEN_INT: ui64 = g_ascii_strtoull (gstring->str, &endptr, 10); break; case G_TOKEN_HEX: ui64 = g_ascii_strtoull (gstring->str, &endptr, 16); break; default: ; } if (scanner->config->store_int64) value.v_int64 = ui64; else value.v_int = ui64; } if (endptr && *endptr) { token = G_TOKEN_ERROR; if (*endptr == 'e' || *endptr == 'E') value.v_error = G_ERR_NON_DIGIT_IN_CONST; else value.v_error = G_ERR_DIGIT_RADIX; } g_string_free (gstring, TRUE); gstring = NULL; ch = 0; } /* number_parsing:... */ break; default: default_case: { if (config->cpair_comment_single && ch == config->cpair_comment_single[0]) { token = G_TOKEN_COMMENT_SINGLE; in_comment_single = TRUE; gstring = g_string_new (NULL); ch = json_scanner_get_char (scanner, line_p, position_p); while (ch != 0) { if (ch == config->cpair_comment_single[1]) { in_comment_single = FALSE; ch = 0; break; } gstring = g_string_append_c (gstring, ch); ch = json_scanner_get_char (scanner, line_p, position_p); } /* ignore a missing newline at EOF for single line comments */ if (in_comment_single && config->cpair_comment_single[1] == '\n') in_comment_single = FALSE; } else if (config->scan_identifier && ch && strchr (config->cset_identifier_first, ch)) { identifier_precedence: if (config->cset_identifier_nth && ch && strchr (config->cset_identifier_nth, json_scanner_peek_next_char (scanner))) { token = G_TOKEN_IDENTIFIER; gstring = g_string_new (NULL); gstring = g_string_append_c (gstring, ch); do { ch = json_scanner_get_char (scanner, line_p, position_p); gstring = g_string_append_c (gstring, ch); ch = json_scanner_peek_next_char (scanner); } while (ch && strchr (config->cset_identifier_nth, ch)); ch = 0; } else if (config->scan_identifier_1char) { token = G_TOKEN_IDENTIFIER; value.v_identifier = g_new0 (gchar, 2); value.v_identifier[0] = ch; ch = 0; } } if (ch) { if (config->char_2_token) token = ch; else { token = G_TOKEN_CHAR; value.v_char = ch; } ch = 0; } } /* default_case:... */ break; } g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */ } while (ch != 0); if (in_comment_multi || in_comment_single || in_string_sq || in_string_dq) { token = G_TOKEN_ERROR; if (gstring) { g_string_free (gstring, TRUE); gstring = NULL; } (*position_p)++; if (in_comment_multi || in_comment_single) value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT; else /* (in_string_sq || in_string_dq) */ value.v_error = G_ERR_UNEXP_EOF_IN_STRING; } if (gstring) { value.v_string = g_string_free (gstring, FALSE); gstring = NULL; } if (token == G_TOKEN_IDENTIFIER) { if (config->scan_symbols) { JsonScannerKey *key; guint scope_id; scope_id = scanner->scope_id; key = json_scanner_lookup_internal (scanner, scope_id, value.v_identifier); if (!key && scope_id && scanner->config->scope_0_fallback) key = json_scanner_lookup_internal (scanner, 0, value.v_identifier); if (key) { g_free (value.v_identifier); token = G_TOKEN_SYMBOL; value.v_symbol = key->value; } } if (token == G_TOKEN_IDENTIFIER && config->scan_identifier_NULL && strlen (value.v_identifier) == 4) { gchar *null_upper = "NULL"; gchar *null_lower = "null"; if (scanner->config->case_sensitive) { if (value.v_identifier[0] == null_upper[0] && value.v_identifier[1] == null_upper[1] && value.v_identifier[2] == null_upper[2] && value.v_identifier[3] == null_upper[3]) token = G_TOKEN_IDENTIFIER_NULL; } else { if ((value.v_identifier[0] == null_upper[0] || value.v_identifier[0] == null_lower[0]) && (value.v_identifier[1] == null_upper[1] || value.v_identifier[1] == null_lower[1]) && (value.v_identifier[2] == null_upper[2] || value.v_identifier[2] == null_lower[2]) && (value.v_identifier[3] == null_upper[3] || value.v_identifier[3] == null_lower[3])) token = G_TOKEN_IDENTIFIER_NULL; } } } *token_p = token; *value_p = value; }
static void syriac_engine_shape (PangoEngineShape *engine, PangoFont *font, const char *text, gint length, const PangoAnalysis *analysis, PangoGlyphString *glyphs) { PangoFcFont *fc_font; FT_Face face; PangoOTRulesetDescription desc; const PangoOTRuleset *ruleset; PangoOTBuffer *buffer; gulong *properties = NULL; glong n_chars; gunichar *wcs; const char *p; int cluster = 0; int i; g_return_if_fail (font != NULL); g_return_if_fail (text != NULL); g_return_if_fail (length >= 0); g_return_if_fail (analysis != NULL); fc_font = PANGO_FC_FONT (font); face = pango_fc_font_lock_face (fc_font); if (!face) return; buffer = pango_ot_buffer_new (fc_font); pango_ot_buffer_set_rtl (buffer, analysis->level % 2 != 0); pango_ot_buffer_set_zero_width_marks (buffer, TRUE); wcs = g_utf8_to_ucs4_fast (text, length, &n_chars); properties = g_new0 (gulong, n_chars); syriac_assign_properties (wcs, properties, n_chars); g_free (wcs); p = text; for (i=0; i < n_chars; i++) { gunichar wc; PangoGlyph glyph; wc = g_utf8_get_char (p); if (g_unichar_type (wc) != G_UNICODE_NON_SPACING_MARK) cluster = p - text; if (pango_is_zero_width (wc)) glyph = PANGO_GLYPH_EMPTY; else { gunichar c = wc; if (analysis->level % 2) g_unichar_get_mirror_char (c, &c); glyph = pango_fc_font_get_glyph (fc_font, c); } if (!glyph) glyph = PANGO_GET_UNKNOWN_GLYPH (wc); pango_ot_buffer_add_glyph (buffer, glyph, properties[i], cluster); p = g_utf8_next_char (p); } g_free (properties); desc.script = analysis->script; desc.language = analysis->language; desc.n_static_gsub_features = G_N_ELEMENTS (gsub_features); desc.static_gsub_features = gsub_features; desc.n_static_gpos_features = G_N_ELEMENTS (gpos_features); desc.static_gpos_features = gpos_features; /* TODO populate other_features from analysis->extra_attrs */ desc.n_other_features = 0; desc.other_features = NULL; ruleset = pango_ot_ruleset_get_for_description (pango_ot_info_get (face), &desc); pango_ot_ruleset_substitute (ruleset, buffer); pango_ot_ruleset_position (ruleset, buffer); pango_ot_buffer_output (buffer, glyphs); pango_ot_buffer_destroy (buffer); pango_fc_font_unlock_face (fc_font); }
/** Sets the value of the property by parsing str. Note: this should * only be called once on an instance of TransProperty, as calling it * more than once can cause memory leaks. * @param prop The property being set * @param str The string to be parsed * @return TRUE on success, FALSE on failure */ static gboolean trans_property_set(TransProperty* prop, char* str) { char *endptr, *possible_currency_symbol, *str_dupe; double value; switch (prop->type) { case GNC_CSV_DATE: prop->value = g_new(time_t, 1); *((time_t*)(prop->value)) = parse_date(str, prop->list->date_format); return *((time_t*)(prop->value)) != -1; case GNC_CSV_DESCRIPTION: case GNC_CSV_NUM: prop->value = g_strdup(str); return TRUE; case GNC_CSV_BALANCE: case GNC_CSV_DEPOSIT: case GNC_CSV_WITHDRAWAL: str_dupe = g_strdup(str); /* First, we make a copy so we can't mess up real data. */ /* Go through str_dupe looking for currency symbols. */ for (possible_currency_symbol = str_dupe; *possible_currency_symbol; possible_currency_symbol = g_utf8_next_char(possible_currency_symbol)) { if (g_unichar_type(g_utf8_get_char(possible_currency_symbol)) == G_UNICODE_CURRENCY_SYMBOL) { /* If we find a currency symbol, save the position just ahead * of the currency symbol (next_symbol), and find the null * terminator of the string (last_symbol). */ char *next_symbol = g_utf8_next_char(possible_currency_symbol), *last_symbol = next_symbol; while (*last_symbol) last_symbol = g_utf8_next_char(last_symbol); /* Move all of the string (including the null byte, which is * why we have +1 in the size parameter) following the * currency symbol back one character, thereby overwriting the * currency symbol. */ memmove(possible_currency_symbol, next_symbol, last_symbol - next_symbol + 1); break; } } /* Translate the string (now clean of currency symbols) into a number. */ value = strtod(str_dupe, &endptr); /* If this isn't a valid numeric string, this is an error. */ if (endptr != str_dupe + strlen(str_dupe)) { g_free(str_dupe); return FALSE; } g_free(str_dupe); if (abs(value) > 0.00001) { prop->value = g_new(gnc_numeric, 1); *((gnc_numeric*)(prop->value)) = double_to_gnc_numeric(value, xaccAccountGetCommoditySCU(prop->list->account), GNC_RND_ROUND); } return TRUE; } return FALSE; /* We should never actually get here. */ }
static void log_attr_foreach (const char *text, PangoLogAttr *attrs, CharForeachFunc func, gpointer data) { const gchar *next = text; gint length = strlen (text); const gchar *end = text + length; gint i = 0; gunichar prev_wc; gunichar next_wc; GUnicodeType prev_type; GUnicodeType next_type; if (next == end) return; offset = 0; line = 1; prev_type = (GUnicodeType) -1; prev_wc = 0; next_wc = g_utf8_get_char (next); next_type = g_unichar_type (next_wc); line_start = text; line_end = text; while (next_wc != 0) { GUnicodeType type; gunichar wc; wc = next_wc; type = next_type; current_wc = wc; next = g_utf8_next_char (next); line_end = next; if (next >= end) next_wc = 0; else next_wc = g_utf8_get_char (next); if (next_wc) next_type = g_unichar_type (next_wc); (* func) (wc, prev_wc, next_wc, type, prev_type, next_type, &attrs[i], i != 0 ? &attrs[i-1] : NULL, next_wc != 0 ? &attrs[i+1] : NULL, data); prev_type = type; prev_wc = wc; ++i; ++offset; if (wc == '\n') { ++line; offset = 0; line_start = next; line_end = next; } } }
int main(int argc, char *argv[]) { // Set locale required for Glib-2.0: setlocale(LC_ALL, ""); GError *error = NULL; GOptionContext *context; // Process command line arguments. context = g_option_context_new(" - convert fortune file to omikuji file"); g_option_context_add_main_entries(context, options, NULL); if (!g_option_context_parse(context, &argc, &argv, &error)) { g_printerr("%s\n", error->message); return 1; } else if (error != NULL) { g_printerr("%s\n", error->message); g_clear_error(&error); } if (inputEncoding == NULL) g_get_charset(&inputEncoding); GRegex *regex = compileRegularExpression(&error); if (regex == NULL) { g_printerr("%s\n", error->message); return 2; } GList *comments = NULL, *fortunes = NULL; gchar *buf; // Process input files. for (gint i = 1; i < argc; i++) { buf = slurpfile(argv[i], &error); if (buf == NULL) { if (error->domain == G_FILE_ERROR) g_printerr("%s\n", error->message); else g_printerr("Failed to parse %s: %s\n", argv[i], error->message); if (outputFilename) return 3; else { g_clear_error(&error); continue; } } GMatchInfo *match_info; gint start = 0, end = 0, mstart, mend; gboolean isComment = FALSE; if (g_regex_match(regex, buf, 0, &match_info)) { while (g_match_info_matches(match_info)) { if (g_match_info_fetch_pos(match_info, 0, &mstart, &mend)) { if (start == 0) { start = mend; gunichar next = g_utf8_get_char(&buf[mend]); if (g_unichar_isspace(next)) { start++; if (g_unichar_type(next) == G_UNICODE_SPACE_SEPARATOR) isComment = TRUE; else isComment = FALSE; } else isComment = TRUE; if (!g_match_info_next(match_info, &error)) g_clear_error(&error); } else { if ((isComment && !skipComments) || !isComment) { end = mstart; GString *string = g_string_sized_new(end - start); for (gint j = start; j < end; j++) g_string_append_c(string, buf[j]); g_string_append_c(string, '\0'); if (isComment) comments = g_list_append(comments, string); else fortunes = g_list_append(fortunes, string); } start = 0; end = 0; } } } g_match_info_free(match_info); if (outputFilename == NULL) { outputFilename = g_path_get_basename(argv[i]); if (outputFilename) { gpointer temp = g_try_realloc(outputFilename, strlen(outputFilename) + 5); if (temp) { gboolean delStr = TRUE; outputFilename = (gchar *) temp; strcat(outputFilename, ".omi"); writeOmikujiFile(outputFilename, comments, fortunes); if (comments) { g_list_foreach(comments, stringDelete, &delStr); g_list_free(comments); comments = NULL; } g_list_foreach(fortunes, stringDelete, &delStr); g_list_free(fortunes); fortunes = NULL; } g_free(outputFilename); outputFilename = NULL; } } } g_free(buf); } g_regex_unref(regex); if (outputFilename) { gboolean delStr = TRUE; writeOmikujiFile(outputFilename, comments, fortunes); if (comments) { g_list_foreach(comments, stringDelete, &delStr); g_list_free(comments); comments = NULL; } g_list_foreach(fortunes, stringDelete, &delStr); g_list_free(fortunes); fortunes = NULL; } return 0; }
/** Sets the value of the property by parsing str. Note: this should * only be called once on an instance of TransProperty, as calling it * more than once can cause memory leaks. * @param prop The property being set * @param str The string to be parsed * @return TRUE on success, FALSE on failure */ static gboolean trans_property_set (TransProperty* prop, char* str) { char *endptr, *possible_currency_symbol, *str_dupe; gnc_numeric val; int reti; regex_t regex; switch (prop->type) { case GNC_CSV_DATE: prop->value = g_new(time64, 1); *((time64*)(prop->value)) = parse_date(str, prop->list->date_format); return *((time64*)(prop->value)) != -1; case GNC_CSV_DESCRIPTION: case GNC_CSV_NOTES: case GNC_CSV_NUM: prop->value = g_strdup (str); return TRUE; case GNC_CSV_BALANCE: case GNC_CSV_DEPOSIT: case GNC_CSV_WITHDRAWAL: str_dupe = g_strdup (str); /* First, we make a copy so we can't mess up real data. */ /* If a cell is empty or just spaces make its value = "0" */ reti = regcomp(®ex, "[0-9]", 0); reti = regexec(®ex, str_dupe, 0, NULL, 0); if (reti == REG_NOMATCH) { g_free (str_dupe); str_dupe = g_strdup ("0"); } /* Go through str_dupe looking for currency symbols. */ for (possible_currency_symbol = str_dupe; *possible_currency_symbol; possible_currency_symbol = g_utf8_next_char (possible_currency_symbol)) { if (g_unichar_type (g_utf8_get_char (possible_currency_symbol)) == G_UNICODE_CURRENCY_SYMBOL) { /* If we find a currency symbol, save the position just ahead * of the currency symbol (next_symbol), and find the null * terminator of the string (last_symbol). */ char *next_symbol = g_utf8_next_char (possible_currency_symbol), *last_symbol = next_symbol; while (*last_symbol) last_symbol = g_utf8_next_char (last_symbol); /* Move all of the string (including the null byte, which is * why we have +1 in the size parameter) following the * currency symbol back one character, thereby overwriting the * currency symbol. */ memmove (possible_currency_symbol, next_symbol, last_symbol - next_symbol + 1); break; } } /* Currency format */ switch (prop->list->currency_format) { case 0: /* Currancy locale */ if (!(xaccParseAmount (str_dupe, TRUE, &val, &endptr))) { g_free (str_dupe); return FALSE; } break; case 1: /* Currancy decimal period */ if (!(xaccParseAmountExtended (str_dupe, TRUE, '-', '.', ',', "\003\003", "$+", &val, &endptr))) { g_free (str_dupe); return FALSE; } break; case 2: /* Currancy decimal comma */ if (!(xaccParseAmountExtended (str_dupe, TRUE, '-', ',', '.', "\003\003", "$+", &val, &endptr))) { g_free (str_dupe); return FALSE; } break; } prop->value = g_new (gnc_numeric, 1); *((gnc_numeric*)(prop->value)) = val; g_free (str_dupe); return TRUE; } return FALSE; /* We should never actually get here. */ }
gchar* rb_search_fold (const char *original) { GString *string; gchar *normalized; gunichar *unicode, *cur; g_return_val_if_fail (original != NULL, NULL); /* old behaviour is equivalent to: return g_utf8_casefold (original, -1); */ string = g_string_new (NULL); normalized = g_utf8_normalize(original, -1, G_NORMALIZE_DEFAULT); unicode = g_utf8_to_ucs4_fast (normalized, -1, NULL); for (cur = unicode; *cur != 0; cur++) { switch (g_unichar_type (*cur)) { case G_UNICODE_COMBINING_MARK: case G_UNICODE_ENCLOSING_MARK: case G_UNICODE_NON_SPACING_MARK: case G_UNICODE_CONNECT_PUNCTUATION: case G_UNICODE_DASH_PUNCTUATION: case G_UNICODE_CLOSE_PUNCTUATION: case G_UNICODE_FINAL_PUNCTUATION: case G_UNICODE_INITIAL_PUNCTUATION: case G_UNICODE_OTHER_PUNCTUATION: case G_UNICODE_OPEN_PUNCTUATION: /* remove these */ break; case G_UNICODE_LOWERCASE_LETTER: case G_UNICODE_MODIFIER_LETTER: case G_UNICODE_OTHER_LETTER: case G_UNICODE_TITLECASE_LETTER: case G_UNICODE_UPPERCASE_LETTER: /* convert to lower case */ *cur = g_unichar_tolower (*cur); /* ... and fall through */\ case G_UNICODE_DECIMAL_NUMBER: case G_UNICODE_LETTER_NUMBER: case G_UNICODE_OTHER_NUMBER: /* should be keep symbols? */ case G_UNICODE_CURRENCY_SYMBOL: case G_UNICODE_MODIFIER_SYMBOL: case G_UNICODE_MATH_SYMBOL: case G_UNICODE_OTHER_SYMBOL: g_string_append_unichar (string, *cur); break; case G_UNICODE_UNASSIGNED: rb_debug ("unassigned unicode character type found"); /* fall through */ default: /* leave these in */ g_string_append_unichar (string, *cur); } } g_free (unicode); g_free (normalized); return g_string_free (string, FALSE); }