예제 #1
0
파일: utf8.c 프로젝트: pombredanne/NetSurf
/**
 * Calculate the length (in bytes) of a UTF-8 character
 *
 * \param s  Pointer to start of character
 * \return Length of character, in bytes
 */
size_t utf8_char_byte_length(const char *s)
{
	size_t len;
	parserutils_error perror;

	perror = parserutils_charset_utf8_char_byte_length((const uint8_t *) s,
			&len);
	assert(perror == PARSERUTILS_OK);

	return len;
}
예제 #2
0
파일: string.c 프로젝트: dunkelstern/libdom
/** Convert the given string to lowercase
 *
 * \param source 
 * \param ascii_only  Whether to only convert [a-z] to [A-Z]
 * \param lower       Result pointer for lowercase string.  Caller owns ref
 *
 * \return DOM_NO_ERR on success.
 *
 * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.
 */
dom_exception
dom_string_tolower(dom_string *source, bool ascii_only, dom_string **lower)
{
	const uint8_t *orig_s = (const uint8_t *) dom_string_data(source);
	const size_t nbytes = dom_string_byte_length(source);
	uint8_t *copy_s;
	size_t index = 0, clen;
	parserutils_error err;
	dom_exception exc;
	
	if (ascii_only == false)
		return DOM_NOT_SUPPORTED_ERR;
	
	copy_s = malloc(nbytes);
	if (copy_s == NULL)
		return DOM_NO_MEM_ERR;
	memcpy(copy_s, orig_s, nbytes);
	
	while (index < nbytes) {
		err = parserutils_charset_utf8_char_byte_length(orig_s + index,
								&clen);
		if (err != PARSERUTILS_OK) {
			free(copy_s);
			/** \todo Find a better exception */
			return DOM_NO_MEM_ERR;
		}
		
		if (clen == 1) {
			if (orig_s[index] >= 'A' &&
			    orig_s[index] <= 'Z')
				copy_s[index] += 'a' - 'A';
		}
		
		index += clen;
	}
	
	if (((dom_string_internal*)source)->type == DOM_STRING_CDATA) {
		exc = dom_string_create(copy_s, nbytes, lower);
	} else {
		exc = dom_string_create_interned(copy_s, nbytes, lower);
	}
	
	free(copy_s);
	
	return exc;
}
예제 #3
0
파일: string.c 프로젝트: dunkelstern/libdom
/**
 * Get the UCS4 character at position index
 *
 * \param index  The position of the charater
 * \param ch     The UCS4 character
 * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
 */
dom_exception dom_string_at(dom_string *str, uint32_t index, 
		uint32_t *ch)
{
	const uint8_t *s;
	size_t clen, slen;
	uint32_t c, i;
	parserutils_error err;

	s = (const uint8_t *) dom_string_data(str);
	slen = dom_string_byte_length(str);

	i = 0;

	while (slen > 0) {
		err = parserutils_charset_utf8_char_byte_length(s, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		i++;
		if (i == index + 1)
			break;

		s += clen;
		slen -= clen;
	}

	if (i == index + 1) {
		err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		*ch = c;
		return DOM_NO_ERR;
	} else {
		return DOM_DOMSTRING_SIZE_ERR;
	}
}