Example #1
0
/**
 * Get the index of the last occurrence of a character in a dom string 
 * 
 * \param str  The string to search in
 * \param chr  UCS4 value to look for
 * \return Character index of found character, or -1 if none found
 */
off_t dom_string_rindex(dom_string *str, uint32_t chr)
{
	const uint8_t *s;
	size_t clen = 0, slen;
	uint32_t c = 0;
    off_t coff, index;
	parserutils_error err;

	s = (const uint8_t *) dom_string_data(str);
	slen = dom_string_byte_length(str);

	index = dom_string_length(str);

	while (slen > 0) {
		err = parserutils_charset_utf8_prev(s, slen, 
				(off_t *) &coff);
		if (err == PARSERUTILS_OK) {
			err = parserutils_charset_utf8_to_ucs4(s + coff, 
					slen - clen, &c, &clen);
		}

		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		if (c == chr) {
			return index;
		}

		slen -= clen;
		index--;
	}

	return (uint32_t) -1;
}
Example #2
0
/**
 * Get the index of the first occurrence of a character in a dom string 
 * 
 * \param str  The string to search in
 * \param chr  UCS4 value to look for
 * \return Character index of found character, or -1 if none found 
 */
off_t dom_string_index(dom_string *str, uint32_t chr)
{
	const uint8_t *s;
	size_t clen, slen;
	uint32_t c, index;
	parserutils_error err;

	s = (const uint8_t *) dom_string_data(str);
	slen = dom_string_byte_length(str);

	index = 0;

	while (slen > 0) {
		err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		if (c == chr) {
			return index;
		}

		s += clen;
		slen -= clen;
		index++;
	}

	return (uint32_t) -1;
}
Example #3
0
/**
 * Validate whether the string is a legal NCName.
 * Refer http://www.w3.org/TR/REC-xml-names/ for detail.
 *
 * \param str  The name to validate
 * \return true if ::name is valid, false otherwise.
 */
bool _dom_validate_ncname(dom_string *name)
{
	uint32_t ch;
	size_t clen, slen;
	parserutils_error err;
	const uint8_t *s;

	if (name == NULL)
		return false;

	slen = dom_string_length(name);
	if (slen == 0)
		return false;

	s = (const uint8_t *) dom_string_data(name);
	slen = dom_string_byte_length(name);
	
	err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
	if (err != PARSERUTILS_OK) {
		return false;
	}
	
	if (is_letter(ch) == false && ch != (uint32_t) '_')
		return false;
	
	s += clen;
	slen -= clen;
	
	while (slen > 0) {
		err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
		if (err != PARSERUTILS_OK) {
			return false;
		}

		if (is_name_char(ch) == false)
			return false;

		if (ch == (uint32_t) ':')
			return false;

		s += clen;
		slen -= clen;
	}

	return true;
}
Example #4
0
/**
 * Convert a UTF-8 multibyte sequence into a single UCS4 character
 *
 * Encoding of UCS values outside the UTF-16 plane has been removed from
 * RFC3629. This function conforms to RFC2279, however.
 *
 * \param s_in  The sequence to process
 * \param l  Length of sequence
 * \return   UCS4 character
 */
uint32_t utf8_to_ucs4(const char *s_in, size_t l)
{
	uint32_t ucs4;
	size_t len;
	parserutils_error perror;

	perror = parserutils_charset_utf8_to_ucs4((const uint8_t *) s_in, l, 
			&ucs4, &len);
	if (perror != PARSERUTILS_OK)
		ucs4 = 0xfffd;

	return ucs4;
}
Example #5
0
/**
 * Get the UCS4 character at position index
 *
 * \param index  The position of the charater
 * \param ch     The UCS4 character
 * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
 */
dom_exception dom_string_at(dom_string *str, uint32_t index, 
		uint32_t *ch)
{
	const uint8_t *s;
	size_t clen, slen;
	uint32_t c, i;
	parserutils_error err;

	s = (const uint8_t *) dom_string_data(str);
	slen = dom_string_byte_length(str);

	i = 0;

	while (slen > 0) {
		err = parserutils_charset_utf8_char_byte_length(s, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		i++;
		if (i == index + 1)
			break;

		s += clen;
		slen -= clen;
	}

	if (i == index + 1) {
		err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		*ch = c;
		return DOM_NO_ERR;
	} else {
		return DOM_DOMSTRING_SIZE_ERR;
	}
}