/* This will try to convert a string to utf-8, */ static id3_utf8_t * import_id3_string(bool is_id3v1, const id3_ucs4_t *ucs4) { id3_utf8_t *utf8, *utf8_stripped = NULL; id3_latin1_t *isostr; const char *encoding; bool is_utf8; encoding = config_get_string(CONF_ID3V1_ENCODING, NULL); isostr = id3_ucs4_latin1duplicate(ucs4); if (G_UNLIKELY(!isostr)) { return NULL; } is_utf8 = check_utf8((const unsigned char *)isostr, strlen((const char *)isostr)); if(is_utf8) { utf8 = id3_ucs4_utf8duplicate(ucs4); goto done; } /* use encoding field here? */ if (is_id3v1 && encoding) { utf8 = import_8bit_string(isostr, encoding); } else { utf8 = id3_ucs4_utf8duplicate(ucs4); if(!check_utf8((unsigned char *)utf8, strlen((const char *)utf8)) && encoding) { id3_utf8_t *tmp = import_8bit_string(isostr, encoding); if(tmp) { g_free(utf8); utf8 = tmp; } } } done: if(isostr) g_free(isostr); if(utf8) { utf8_stripped = (id3_utf8_t *)g_strdup(g_strstrip((gchar *)utf8)); g_free(utf8); return utf8_stripped; } return NULL; }
static int cts_remove_special_char(const char *src, char *dest, int dest_size) { int s_pos=0, d_pos=0, char_type, src_size; if (NULL == src) { ERR("The parameter(src) is NULL"); dest[d_pos] = '\0'; return 0; } src_size = strlen(src); while (src[s_pos] != 0) { char_type = check_utf8(src[s_pos]); if (0 < char_type && char_type < dest_size - d_pos && char_type <= src_size - s_pos) { memcpy(dest+d_pos, src+s_pos, char_type); d_pos += char_type; s_pos += char_type; } else { ERR("The parameter(src:%s) has invalid character set", src); dest[d_pos] = '\0'; return CTS_ERR_ARG_INVALID; } } dest[d_pos] = '\0'; return d_pos; }
int cts_clean_number(const char *src, char *dest, int dest_size) { int s_pos=0, d_pos=0, char_type; if (NULL == src) ERR("The parameter(src) is NULL"); else { if ('+' == src[s_pos]) dest[d_pos++] = src[s_pos++]; while (src[s_pos] != 0) { if (d_pos >= dest_size-2) break; char_type = check_utf8(src[s_pos]); if (char_type <= 1) { if (check_dirty_number(src[s_pos])) { s_pos++; continue; } dest[d_pos++] = src[s_pos++]; } else s_pos += char_type; } } dest[d_pos] = 0; return d_pos; }
static int font_write(lua_State *L) { font_t *font = checked_font(L, 1); GLfloat x = luaL_checknumber(L, 2); GLfloat y = luaL_checknumber(L, 3); const char *text = luaL_checkstring(L, 4); // Protect FTGL if (!check_utf8(text)) return luaL_error(L, "invalid utf8"); GLfloat size = luaL_checknumber(L, 5) / SCALE; int type = lua_type(L, 6); if (type == LUA_TNUMBER) { GLfloat r = luaL_checknumber(L, 6); GLfloat g = luaL_checknumber(L, 7); GLfloat b = luaL_checknumber(L, 8); GLfloat a = luaL_optnumber(L, 9, 1.0); shader_set_gl_color(r, g, b, a); glBindTexture(GL_TEXTURE_2D, default_tex); } else if (type == LUA_TUSERDATA || type == LUA_TTABLE) { lua_pushliteral(L, "texid"); lua_gettable(L, 6); if (lua_type(L, -1) != LUA_TFUNCTION) return luaL_argerror(L, 6, "no texid() function"); lua_pushvalue(L, 6); lua_call(L, 1, 1); if (lua_type(L, -1) != LUA_TNUMBER) return luaL_argerror(L, 6, "texid() did not return number"); int tex_id = lua_tonumber(L, -1); lua_pop(L, 1); shader_set_gl_color(1.0, 1.0, 1.0, 1.0); glBindTexture(GL_TEXTURE_2D, tex_id); } else { return luaL_argerror(L, 6, "unsupported value. must be RGBA or texturelike"); } glPushMatrix(); glTranslatef(x, y, 0); glTranslatef(0, size * (SCALE * 0.8), 0); glScalef(size, -size, 1.0); ftglRenderFont(font->font, text, FTGL_RENDER_ALL); glPopMatrix(); lua_pushnumber(L, ftglGetFontAdvance(font->font, text) * size); return 1; }
int helper_normalize_str(const char *src, char *dest, int dest_size) { int type = CTS_LANG_OTHERS; int32_t size; UErrorCode status = 0; UChar tmp_result[CTS_SQL_MAX_LEN*2]; UChar result[CTS_SQL_MAX_LEN*2]; int i = 0; int j = 0; int str_len = strlen(src); int char_len = 0; for (i=0;i<str_len;i+=char_len) { char char_src[10]; char_len = check_utf8(src[i]); memcpy(char_src, &src[i], char_len); char_src[char_len] = '\0'; u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strFromUTF8() Failed(%s)", u_errorName(status)); u_strToLower(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strToLower() Failed(%s)", u_errorName(status)); size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0, (UChar *)result, array_sizeof(result), &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "unorm_normalize(%s) Failed(%s)", char_src, u_errorName(status)); if (0 == i) type = helper_check_language(result); helper_extra_normalize(result, size); u_strToUTF8(&dest[j], dest_size-j, &size, result, -1, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strToUTF8() Failed(%s)", u_errorName(status)); j += size; dest[j++] = 0x01; } dest[j]='\0'; HELPER_DBG("src(%s) is transformed(%s)", src, dest); return type; }
bool clean_input_string(string &str) { constexpr size_t LENGTH_LIMIT = 35000; // server side limit if (!check_utf8(str)) { return false; } size_t str_size = str.size(); size_t new_size = 0; for (size_t pos = 0; pos < str_size; pos++) { unsigned char c = static_cast<unsigned char>(str[pos]); switch (c) { // remove control characters case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 9: // allow '\n' case 11: case 12: // ignore '\r' case 14: case 15: case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32: str[new_size++] = ' '; break; case '\r': // skip break; default: // remove \xe2\x80[\xa8-\xae] if (c == 0xe2 && pos + 2 < str_size) { unsigned char next = static_cast<unsigned char>(str[pos + 1]); if (next == 0x80) { next = static_cast<unsigned char>(str[pos + 2]); if (0xa8 <= next && next <= 0xae) { pos += 2; break; } } } // remove vertical lines \xcc[\xb3\xbf\x8a] if (c == 0xcc && pos + 1 < str_size) { unsigned char next = static_cast<unsigned char>(str[pos + 1]); if (next == 0xb3 || next == 0xbf || next == 0x8a) { pos++; break; } } str[new_size++] = str[pos]; break; } if (new_size >= LENGTH_LIMIT - 3 && is_utf8_character_first_code_unit(str[new_size - 1])) { new_size--; break; } } str.resize(new_size); return true; }
/** * This function compares compares two strings which must have been normalized already. * If search_str is included in str, this function return #CTS_SUCCESS. \n * The behavior of this function cannot fix because of localization. * So, The behavior can be different from each other. * * @param[in] haystack Base string. * @param[in] needle searching string * @param[out] len substring length * @return a position of the beginning of the substring, Negative value(#cts_error) on error or difference. * @par example * @code ret = contacts_svc_compare_normalized_str(str1, str2, &len); if(CTS_SUCCESS == ret) { snprintf(first, ret+1, "%s", item_data->display); snprintf(middle, len+1, "%s", item_data->display + ret); printf("%s -> %s, %s, %s", item_data->display, first, middle, item_data->display + ret + len); } else printf("str1 doesn't has str2"); * @endcode */ API int contacts_svc_normalized_strstr(const char *haystack, const char *needle, int *len) { int i, j, wind, h_len, n_len; int first_needle_len; int equal_index; int equal_length; int equal_wind = 0; bool counted = false; retvm_if(NULL == haystack, -1, "The parameter(haystack) is NULL"); retvm_if(NULL == needle, -1, "The parameter(needle) is NULL"); CTS_DBG("haystack = %s, needle = %s", haystack, needle); h_len = 1; n_len = 1; equal_index = 0; first_needle_len = check_utf8(needle[0]); for (i=0, j=0;i<strlen(haystack);i = wind?wind:(i+h_len)) { if (equal_wind) { equal_index = equal_wind; counted = false; } wind = 0; equal_length = 0; equal_wind = 0; for (j=0;j<strlen(needle);) { bool equal; h_len = check_utf8(haystack[i]); if (h_len == 1 && haystack[i] == 0x1) { //skip seperator counted = false; i+=h_len; continue; } n_len = check_utf8(needle[j]); if (n_len == 1 && needle[j] == 0x1) { //skip seperator j++; continue; } if (wind == 0 && j && 0 < i) { if (h_len == first_needle_len && compare_unicode(&haystack[i], needle, first_needle_len) && !is_diacritical(&haystack[i])) { unsigned short tmp; tmp = (haystack[i+1] << 8) | haystack[i+2]; if (!counted) { wind = i; equal_wind = equal_index + equal_length; } } } if ((2 == h_len && is_diacritical(&haystack[i])) && (2 != n_len || !is_diacritical(&needle[j]))) { if (j == 0) { if (counted) equal_index++; else { equal_index += h_len; counted = true; } } else if (!counted) { equal_length += h_len; counted = true; } else if (counted) equal_length++; i+=h_len; continue; } if (h_len != n_len) { if (!counted) { equal_index += (equal_length + h_len); counted = true; } break; } if (3 == n_len && is_choseong(&needle[j]) && !(is_choseong(&haystack[i]))) { if (j < (n_len+1) || !is_choseong(&needle[j-n_len-1])) { // skip 강나 search by 가나 if (!counted) { equal_index += (equal_length + h_len); counted = true; } break; } else { if (j == 0) { if (!counted) { equal_index += h_len; counted = true; } } else if (!counted) { equal_length += h_len; counted = true; } i+=h_len; continue; } } equal = compare_unicode(&haystack[i], &needle[j], n_len); if (equal) { if (!counted) { equal_length += h_len; counted = true; } else if (2 == n_len && is_diacritical(&needle[j])) equal_length ++; j += n_len; i+=h_len; continue; } else { if (!counted) { equal_index += (equal_length + h_len); counted = true; } else { if (2 == n_len && is_diacritical(&needle[j])) equal_index += (equal_length + 1); else equal_index += equal_length; } break; } } if ('\0' == needle[j]) { if ('\0' != haystack[i]) { h_len = check_utf8(haystack[i]); if(h_len == 2 && is_diacritical(&haystack[i])) equal_length++; } *len = equal_length; return equal_index; } } CTS_DBG("NOT match"); return -1; }