static void next_char(Eo_Lexer *ls) { int nb; Eina_Bool end = EINA_FALSE; if (ls->stream == ls->stream_end) { end = EINA_TRUE; ls->current = '\0'; } else ls->current = *(ls->stream++); nb = lastbytes; if (!nb && end) nb = 1; if (!nb) eina_unicode_utf8_next_get(ls->stream - 1, &nb); if (nb == 1) { nb = 0; ++ls->icolumn; ls->column = ls->icolumn; } else --nb; lastbytes = nb; }
EAPI Eina_Unicode eina_unicode_utf8_get_prev(const char *buf, int *iindex) { int r, ind; EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0); EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0); ind = *iindex; /* First obtain the codepoint at iindex */ r = eina_unicode_utf8_next_get(buf, &ind); /* although when ind == 0 there's no previous char, we still want to get * the current char */ if (*iindex <= 0) return r; /* Next advance iindex to previous codepoint */ ind = *iindex; ind--; while ((ind > 0) && ((buf[ind] & 0xc0) == 0x80)) ind--; *iindex = ind; return r; }
static void _markup_get_text_utf8_append(Eina_Strbuf *sbuf, const char *text) { int ch, pos = 0, pos2 = 0; for (;;) { pos = pos2; ch = eina_unicode_utf8_next_get(text, &pos2); if ((ch <= 0) || (pos2 <= 0)) break; if (ch == _NEWLINE) eina_strbuf_append(sbuf, "<br/>"); else if (ch == _TAB) eina_strbuf_append(sbuf, "<tab/>"); else if (ch == '<') eina_strbuf_append(sbuf, "<"); else if (ch == '>') eina_strbuf_append(sbuf, ">"); else if (ch == '&') eina_strbuf_append(sbuf, "&"); else if (ch == '"') eina_strbuf_append(sbuf, """); else if (ch == _PARAGRAPH_SEPARATOR) eina_strbuf_append(sbuf, "<ps/>"); else if (ch == _REPLACEMENT_CHAR) eina_strbuf_append(sbuf, ""); else if (ch != '\r') { eina_strbuf_append_length(sbuf, text + pos, pos2 - pos); } } }
static unsigned int _ecore_imf_xim_utf8_offset_to_index(const char *str, int offset) { int idx = 0; int i; for (i = 0; i < offset; i++) { eina_unicode_utf8_next_get(str, &idx); } return idx; }
EAPI int eina_unicode_utf8_get_len(const char *buf) { /* returns the number of utf8 characters (not bytes) in the string */ int i = 0, len = 0; EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0); while (eina_unicode_utf8_next_get(buf, &i)) len++; return len; }
static unsigned int utf8_offset_to_characters(const char *str, int offset) { int index = 0; unsigned int i = 0; for (; index < offset; i++) { if (eina_unicode_utf8_next_get(str, &index) == 0) break; } return i; }
EAPI Eina_Unicode * eina_unicode_utf8_to_unicode(const char *utf, int *_len) { /* FIXME: Should optimize! */ int len, i; int ind; Eina_Unicode *buf, *uind; EINA_SAFETY_ON_NULL_RETURN_VAL(utf, NULL); len = eina_unicode_utf8_get_len(utf); if (_len) *_len = len; buf = (Eina_Unicode *) calloc(sizeof(Eina_Unicode), (len + 1)); if (!buf) return buf; for (i = 0, ind = 0, uind = buf ; i < len ; i++, uind++) { *uind = eina_unicode_utf8_next_get(utf, &ind); } return buf; }
int evry_fuzzy_match(const char *str, const char *match) { const char *p, *m, *next; int sum = 0; unsigned int last = 0; unsigned int offset = 0; unsigned int min = 0; unsigned char first = 0; /* ignore punctuation */ unsigned char ip = 1; unsigned int cnt = 0; /* words in match */ unsigned int m_num = 0; unsigned int m_cnt = 0; unsigned int m_min[MAX_WORDS]; unsigned int m_len = 0; unsigned int s_len = 0; if (!match || !str || !match[0] || !str[0]) return 0; /* remove white spaces at the beginning */ for (; (*match != 0) && isspace(*match); match++) ; for (; (*str != 0) && isspace(*str); str++) ; /* count words in match */ for (m = match; (*m != 0) && (m_num < MAX_WORDS); ) { for (; (*m != 0) && !isspace(*m); m++) ; for (; (*m != 0) && isspace(*m); m++) ; m_min[m_num++] = MAX_FUZZ; } for (m = match; ip && (*m != 0); m++) if (ip && ispunct(*m)) ip = 0; m_len = strlen(match); s_len = strlen(str); /* with less than 3 chars match must be a prefix */ if (m_len < 3) m_len = 0; next = str; m = match; while ((m_cnt < m_num) && (*next != 0)) { int ii; /* reset match */ if (m_cnt == 0) m = match; /* end of matching */ if (*m == 0) break; offset = 0; last = 0; min = 1; first = 0; /* m_len = 0; */ /* match current word of string against current match */ for (p = next; *next != 0; p++) { /* new word of string begins */ if ((*p == 0) || isspace(*p) || (ip && ispunct(*p))) { if (m_cnt < m_num - 1) { /* test next match */ for (; (*m != 0) && !isspace(*m); m++) ; for (; (*m != 0) && isspace(*m); m++) ; m_cnt++; break; } else { ii = 0; /* go to next word */ for (; (*p != 0) && ((isspace(*p) || (ip && ispunct(*p)))); p += ii) if (!eina_unicode_utf8_next_get(p, &ii)) break; cnt++; next = p; m_cnt = 0; break; } } /* current char matches? */ if (tolower(*p) != tolower(*m)) { if (!first) offset += 1; else offset += 3; /* m_len++; */ if (offset <= m_len * 3) continue; } if (min < MAX_FUZZ && offset <= m_len * 3) { /* first offset of match in word */ if (!first) { first = 1; last = offset; } min += offset + (offset - last) * 5; last = offset; /* try next char of match */ ii = 0; if (!eina_unicode_utf8_next_get(m, &ii)) continue; m += ii; if (*m != 0 && !isspace(*m)) continue; /* end of match: store min weight of match */ min += (cnt - m_cnt) > 0 ? (cnt - m_cnt) : 0; if (min < m_min[m_cnt]) m_min[m_cnt] = min; } else { ii = 0; /* go to next match */ for (; (m[0] && m[ii]) && !isspace(*m); m += ii) if (!eina_unicode_utf8_next_get(m, &ii)) break; } if (m_cnt < m_num - 1) { ii = 0; /* test next match */ for (; (m[0] && m[ii]) && !isspace(*m); m += ii) if (!eina_unicode_utf8_next_get(m, &ii)) break; m_cnt++; break; } else if (*p != 0) { ii = 0; /* go to next word */ for (; (p[0] && (s_len - (p - str) >= ii)) && !((isspace(*p) || (ip && ispunct(*p)))); p += ii) if (!eina_unicode_utf8_next_get(p, &ii)) break; ii = 0; for (; (p[0] && (s_len - (p - str) >= ii)) && ((isspace(*p) || (ip && ispunct(*p)))); p += ii) if (!eina_unicode_utf8_next_get(p, &ii)) break; cnt++; next = p; m_cnt = 0; break; } else { next = p; break; } } } for (m_cnt = 0; m_cnt < m_num; m_cnt++) { sum += m_min[m_cnt]; if (sum >= MAX_FUZZ) { sum = 0; break; } } if (sum > 0) { /* exact match ? */ if (strcmp(match, str)) sum += 10; } return sum; }