C++ (Cpp) check_utf8 примеры использования

Пример #1

0

Показать файл

Файл: tag_id3.c Проект: OpenInkpot-archive/iplinux-mpd

/* This will try to convert a string to utf-8,
 */
static id3_utf8_t *
import_id3_string(bool is_id3v1, const id3_ucs4_t *ucs4)
{
    id3_utf8_t *utf8, *utf8_stripped = NULL;
    id3_latin1_t *isostr;
    const char *encoding;
    bool is_utf8;

    encoding = config_get_string(CONF_ID3V1_ENCODING, NULL);
    isostr = id3_ucs4_latin1duplicate(ucs4);
    if (G_UNLIKELY(!isostr)) {
        return NULL;
    }
    is_utf8 = check_utf8((const unsigned char *)isostr,
        strlen((const char *)isostr));

    if(is_utf8) {
        utf8 = id3_ucs4_utf8duplicate(ucs4);
        goto done;
    }

    /* use encoding field here? */
    if (is_id3v1 && encoding) {
        utf8 = import_8bit_string(isostr, encoding);
    } else {
        utf8 = id3_ucs4_utf8duplicate(ucs4);
        if(!check_utf8((unsigned char *)utf8, strlen((const char *)utf8))
            && encoding)
        {
            id3_utf8_t *tmp = import_8bit_string(isostr, encoding);
            if(tmp)
            {
                g_free(utf8);
                utf8 = tmp;
            }
        }
    }

done:
    if(isostr)
        g_free(isostr);
    if(utf8) {
        utf8_stripped = (id3_utf8_t *)g_strdup(g_strstrip((gchar *)utf8));
        g_free(utf8);
        return utf8_stripped;
    }
    return NULL;

}

Пример #2

0

Показать файл

Файл: cts-normalize.c Проект: tizenorg/platform.core.pim.contacts-service

static int cts_remove_special_char(const char *src, char *dest, int dest_size)
{
	int s_pos=0, d_pos=0, char_type, src_size;

	if (NULL == src) {
		ERR("The parameter(src) is NULL");
		dest[d_pos] = '\0';
		return 0;
	}
	src_size = strlen(src);

	while (src[s_pos] != 0)
	{
		char_type = check_utf8(src[s_pos]);

		if (0 < char_type && char_type < dest_size - d_pos && char_type <= src_size - s_pos) {
			memcpy(dest+d_pos, src+s_pos, char_type);
			d_pos += char_type;
			s_pos += char_type;
		}
		else {
			ERR("The parameter(src:%s) has invalid character set", src);
			dest[d_pos] = '\0';
			return CTS_ERR_ARG_INVALID;
		}
	}

	dest[d_pos] = '\0';
	return d_pos;
}

Пример #3

0

Показать файл

Файл: cts-normalize.c Проект: tizenorg/platform.core.pim.contacts-service

int cts_clean_number(const char *src, char *dest, int dest_size)
{
	int s_pos=0, d_pos=0, char_type;

	if (NULL == src)
		ERR("The parameter(src) is NULL");
	else
	{
		if ('+' == src[s_pos])
			dest[d_pos++] = src[s_pos++];

		while (src[s_pos] != 0)
		{
			if (d_pos >= dest_size-2) break;
			char_type = check_utf8(src[s_pos]);
			if (char_type <= 1) {
				if (check_dirty_number(src[s_pos])) {
					s_pos++;
					continue;
				}
				dest[d_pos++] = src[s_pos++];
			}
			else
				s_pos += char_type;
		}
	}

	dest[d_pos] = 0;
	return d_pos;
}

Пример #4

0

Показать файл

Файл: font.c Проект: dividuum/info-beamer

static int font_write(lua_State *L) {
    font_t *font = checked_font(L, 1);
    GLfloat x = luaL_checknumber(L, 2);
    GLfloat y = luaL_checknumber(L, 3);
    const char *text = luaL_checkstring(L, 4);

    // Protect FTGL
    if (!check_utf8(text))
        return luaL_error(L, "invalid utf8");

    GLfloat size = luaL_checknumber(L, 5) / SCALE;

    int type = lua_type(L, 6);
    if (type == LUA_TNUMBER) {
        GLfloat r = luaL_checknumber(L, 6);
        GLfloat g = luaL_checknumber(L, 7);
        GLfloat b = luaL_checknumber(L, 8);
        GLfloat a = luaL_optnumber(L, 9, 1.0);

        shader_set_gl_color(r, g, b, a);
        glBindTexture(GL_TEXTURE_2D, default_tex);
    } else if (type == LUA_TUSERDATA || type == LUA_TTABLE) {
        lua_pushliteral(L, "texid");
        lua_gettable(L, 6);
        if (lua_type(L, -1) != LUA_TFUNCTION)
            return luaL_argerror(L, 6, "no texid() function");
        lua_pushvalue(L, 6);
        lua_call(L, 1, 1);
        if (lua_type(L, -1) != LUA_TNUMBER)
            return luaL_argerror(L, 6, "texid() did not return number");
        int tex_id = lua_tonumber(L, -1);
        lua_pop(L, 1);

        shader_set_gl_color(1.0, 1.0, 1.0, 1.0);
        glBindTexture(GL_TEXTURE_2D, tex_id);
    } else {
        return luaL_argerror(L, 6, "unsupported value. must be RGBA or texturelike");
    }

    glPushMatrix();
        glTranslatef(x, y, 0);
        glTranslatef(0, size * (SCALE * 0.8), 0);
        glScalef(size, -size, 1.0);
        ftglRenderFont(font->font, text, FTGL_RENDER_ALL);
    glPopMatrix();

    lua_pushnumber(L, ftglGetFontAdvance(font->font, text) * size);
    return 1;
}

Пример #5

0

Показать файл

Файл: normalize.c Проект: tizenorg/platform.core.pim.contacts-service

int helper_normalize_str(const char *src, char *dest, int dest_size)
{
	int type = CTS_LANG_OTHERS;
	int32_t size;
	UErrorCode status = 0;
	UChar tmp_result[CTS_SQL_MAX_LEN*2];
	UChar result[CTS_SQL_MAX_LEN*2];
	int i = 0;
	int j = 0;
	int str_len = strlen(src);
	int char_len = 0;

	for (i=0;i<str_len;i+=char_len) {
		char char_src[10];
		char_len = check_utf8(src[i]);
		memcpy(char_src, &src[i], char_len);
		char_src[char_len] = '\0';

		u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strFromUTF8() Failed(%s)", u_errorName(status));

		u_strToLower(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strToLower() Failed(%s)", u_errorName(status));

		size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
				(UChar *)result, array_sizeof(result), &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"unorm_normalize(%s) Failed(%s)", char_src, u_errorName(status));

		if (0 == i)
			type = helper_check_language(result);
		helper_extra_normalize(result, size);

		u_strToUTF8(&dest[j], dest_size-j, &size, result, -1, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strToUTF8() Failed(%s)", u_errorName(status));
		j += size;
		dest[j++] = 0x01;
	}
	dest[j]='\0';
	HELPER_DBG("src(%s) is transformed(%s)", src, dest);
	return type;
}

Пример #6

0

Показать файл

Файл: misc.cpp Проект: anhdocphys/td

bool clean_input_string(string &str) {
  constexpr size_t LENGTH_LIMIT = 35000;  // server side limit
  if (!check_utf8(str)) {
    return false;
  }

  size_t str_size = str.size();
  size_t new_size = 0;
  for (size_t pos = 0; pos < str_size; pos++) {
    unsigned char c = static_cast<unsigned char>(str[pos]);
    switch (c) {
      // remove control characters
      case 0:
      case 1:
      case 2:
      case 3:
      case 4:
      case 5:
      case 6:
      case 7:
      case 8:
      case 9:
      // allow '\n'
      case 11:
      case 12:
      // ignore '\r'
      case 14:
      case 15:
      case 16:
      case 17:
      case 18:
      case 19:
      case 20:
      case 21:
      case 22:
      case 23:
      case 24:
      case 25:
      case 26:
      case 27:
      case 28:
      case 29:
      case 30:
      case 31:
      case 32:
        str[new_size++] = ' ';
        break;
      case '\r':
        // skip
        break;
      default:
        // remove \xe2\x80[\xa8-\xae]
        if (c == 0xe2 && pos + 2 < str_size) {
          unsigned char next = static_cast<unsigned char>(str[pos + 1]);
          if (next == 0x80) {
            next = static_cast<unsigned char>(str[pos + 2]);
            if (0xa8 <= next && next <= 0xae) {
              pos += 2;
              break;
            }
          }
        }
        // remove vertical lines \xcc[\xb3\xbf\x8a]
        if (c == 0xcc && pos + 1 < str_size) {
          unsigned char next = static_cast<unsigned char>(str[pos + 1]);
          if (next == 0xb3 || next == 0xbf || next == 0x8a) {
            pos++;
            break;
          }
        }

        str[new_size++] = str[pos];
        break;
    }
    if (new_size >= LENGTH_LIMIT - 3 && is_utf8_character_first_code_unit(str[new_size - 1])) {
      new_size--;
      break;
    }
  }

  str.resize(new_size);
  return true;
}

Пример #7

0

Показать файл

Файл: cts-normalize.c Проект: tizenorg/platform.core.pim.contacts-service

/**
 * This function compares compares two strings which must have been normalized already.
 * If search_str is included in str, this function return #CTS_SUCCESS. \n
 * The behavior of this function cannot fix because of localization.
 * So, The behavior can be different from each other.
 *
 * @param[in] haystack Base string.
 * @param[in] needle searching string
 * @param[out] len substring length
 * @return a position of the beginning of the substring, Negative value(#cts_error) on error or difference.
 * @par example
 * @code
 	ret = contacts_svc_compare_normalized_str(str1, str2, &len);
 	if(CTS_SUCCESS == ret) {
		snprintf(first, ret+1, "%s", item_data->display);
		snprintf(middle, len+1, "%s", item_data->display + ret);
		printf("%s -> %s, %s, %s", item_data->display, first, middle, item_data->display + ret + len);
 	} else
 		printf("str1 doesn't has str2");
 * @endcode
 */
API int contacts_svc_normalized_strstr(const char *haystack,
		const char *needle, int *len)
{
	int i, j, wind, h_len, n_len;
	int first_needle_len;
	int equal_index;
	int equal_length;
	int equal_wind = 0;
	bool counted = false;
	retvm_if(NULL == haystack, -1, "The parameter(haystack) is NULL");
	retvm_if(NULL == needle, -1, "The parameter(needle) is NULL");
	CTS_DBG("haystack = %s, needle = %s", haystack, needle);

	h_len = 1;
	n_len = 1;
	equal_index = 0;
	first_needle_len = check_utf8(needle[0]);
	for (i=0, j=0;i<strlen(haystack);i = wind?wind:(i+h_len)) {
		if (equal_wind) {
			equal_index = equal_wind;
			counted = false;
		}
		wind = 0;
		equal_length = 0;
		equal_wind = 0;
		for (j=0;j<strlen(needle);) {
			bool equal;
			h_len = check_utf8(haystack[i]);

			if (h_len == 1 && haystack[i] == 0x1) {		//skip seperator
				counted = false;
				i+=h_len;
				continue;
			}

			n_len = check_utf8(needle[j]);
			if (n_len == 1 && needle[j] == 0x1) {		//skip seperator
				j++;
				continue;
			}

			if (wind == 0 && j && 0 < i) {
				if (h_len == first_needle_len && compare_unicode(&haystack[i], needle, first_needle_len)
						&& !is_diacritical(&haystack[i])) {
					unsigned short tmp;

					tmp = (haystack[i+1] << 8) | haystack[i+2];
					if (!counted) {
						wind = i;
						equal_wind = equal_index + equal_length;
					}
				}
			}

			if ((2 == h_len && is_diacritical(&haystack[i]))
					&& (2 != n_len || !is_diacritical(&needle[j]))) {
				if (j == 0) {
					if (counted)
						equal_index++;
					else {
						equal_index += h_len;
						counted = true;
					}
				}
				else if (!counted) {
					equal_length += h_len;
					counted = true;
				}
				else if (counted)
					equal_length++;
				i+=h_len;
				continue;
			}

			if (h_len != n_len) {
				if (!counted) {
					equal_index += (equal_length + h_len);
					counted = true;
				}
				break;
			}

			if (3 == n_len && is_choseong(&needle[j]) && !(is_choseong(&haystack[i]))) {
				if (j < (n_len+1) || !is_choseong(&needle[j-n_len-1])) {		// skip 강나 search by 가나
					if (!counted) {
						equal_index += (equal_length + h_len);
						counted = true;
					}
					break;
				}
				else {
					if (j == 0) {
						if (!counted) {
							equal_index += h_len;
							counted = true;
						}
					}
					else if (!counted) {
						equal_length += h_len;
						counted = true;
					}
					i+=h_len;
					continue;
				}
			}

			equal = compare_unicode(&haystack[i], &needle[j], n_len);

			if (equal) {
				if (!counted) {
					equal_length += h_len;
					counted = true;
				}
				else if (2 == n_len && is_diacritical(&needle[j]))
					equal_length ++;
				j += n_len;
				i+=h_len;
				continue;
			}
			else {
				if (!counted) {
					equal_index += (equal_length + h_len);
					counted = true;
				}
				else {
					if (2 == n_len && is_diacritical(&needle[j]))
						equal_index += (equal_length + 1);
					else
						equal_index += equal_length;
				}
				break;
			}
		}

		if ('\0' == needle[j]) {
			if ('\0' != haystack[i]) {
				h_len = check_utf8(haystack[i]);
				if(h_len == 2 && is_diacritical(&haystack[i]))
					equal_length++;
			}
			*len = equal_length;
			return equal_index;
		}
	}

	CTS_DBG("NOT match");
	return -1;
}