Example #1
0
/**
 * Get the index of the last occurrence of a character in a dom string 
 * 
 * \param str  The string to search in
 * \param chr  UCS4 value to look for
 * \return Character index of found character, or -1 if none found
 */
off_t dom_string_rindex(dom_string *str, uint32_t chr)
{
	const uint8_t *s;
	size_t clen = 0, slen;
	uint32_t c = 0;
    off_t coff, index;
	parserutils_error err;

	s = (const uint8_t *) dom_string_data(str);
	slen = dom_string_byte_length(str);

	index = dom_string_length(str);

	while (slen > 0) {
		err = parserutils_charset_utf8_prev(s, slen, 
				(off_t *) &coff);
		if (err == PARSERUTILS_OK) {
			err = parserutils_charset_utf8_to_ucs4(s + coff, 
					slen - clen, &c, &clen);
		}

		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		if (c == chr) {
			return index;
		}

		slen -= clen;
		index--;
	}

	return (uint32_t) -1;
}
Example #2
0
static duk_ret_t dukky_html_anchor_element_target_getter(duk_context *ctx)
{
	/* Get private data for method */
	html_anchor_element_private_t *priv = NULL;
	duk_push_this(ctx);
	duk_get_prop_string(ctx, -1, dukky_magic_string_private);
	priv = duk_get_pointer(ctx, -1);
	duk_pop_2(ctx);
	if (priv == NULL) {
		return 0; /* can do? No can do. */
	}

#line 34 "HTMLAnchorElement.bnd"
	dom_exception exc;
	dom_string *str;

	exc = dom_html_anchor_element_get_target((struct dom_html_anchor_element *)((node_private_t*)priv)->node, &str);
	if (exc != DOM_NO_ERR) {
		return 0;
	}

	if (str != NULL) {
		duk_push_lstring(ctx,
			dom_string_data(str),
			dom_string_length(str));
		dom_string_unref(str);
	} else {
		duk_push_lstring(ctx, NULL, 0);
	}

	return 1;
}
Example #3
0
/**
 * Validate whether the string is a legal NCName.
 * Refer http://www.w3.org/TR/REC-xml-names/ for detail.
 *
 * \param str  The name to validate
 * \return true if ::name is valid, false otherwise.
 */
bool _dom_validate_ncname(dom_string *name)
{
	uint32_t ch;
	size_t clen, slen;
	parserutils_error err;
	const uint8_t *s;

	if (name == NULL)
		return false;

	slen = dom_string_length(name);
	if (slen == 0)
		return false;

	s = (const uint8_t *) dom_string_data(name);
	slen = dom_string_byte_length(name);
	
	err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
	if (err != PARSERUTILS_OK) {
		return false;
	}
	
	if (is_letter(ch) == false && ch != (uint32_t) '_')
		return false;
	
	s += clen;
	slen -= clen;
	
	while (slen > 0) {
		err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
		if (err != PARSERUTILS_OK) {
			return false;
		}

		if (is_name_char(ch) == false)
			return false;

		if (ch == (uint32_t) ':')
			return false;

		s += clen;
		slen -= clen;
	}

	return true;
}
Example #4
0
/**
 * Insert data into a dom string at the given location
 *
 * \param target  Pointer to string to insert into
 * \param source  Pointer to string to insert
 * \param offset  Character offset of location to insert at
 * \param result  Pointer to location to receive result
 * \return DOM_NO_ERR          on success, 
 *         DOM_NO_MEM_ERR      on memory exhaustion,
 *         DOM_INDEX_SIZE_ERR  if ::offset > len(::target).
 *
 * The returned string will have its reference count increased. The client
 * should dereference it once it has finished with it. 
 */
dom_exception dom_string_insert(dom_string *target,
		dom_string *source, off_t offset,
		dom_string **result)
{
	dom_string_internal *res;
	const uint8_t *t, *s;
	size_t tlen, slen, clen;
	off_t ins = 0;
	parserutils_error err;

	/* target string is NULL equivalent to empty. */
	if (target == NULL)
		target = (dom_string *)&empty_string;

	t = (const uint8_t *) dom_string_data(target);
	tlen = dom_string_byte_length(target);
	s = (const uint8_t *) dom_string_data(source);
	slen = dom_string_byte_length(source);

	clen = dom_string_length(target);

	if (offset > clen)
		return DOM_INDEX_SIZE_ERR;

	/* Calculate the byte index of the insertion point */
	if (offset == clen) {
		/* Optimisation for append */
		ins = tlen;
	} else {
		while (offset > 0) {
			err = parserutils_charset_utf8_next(t, tlen, 
					ins, &ins);

			if (err != PARSERUTILS_OK) {
				return DOM_NO_MEM_ERR;
			}

			offset--;
		}
	}

	/* Allocate result string */
	res = malloc(sizeof(*res));
	if (res == NULL) {
		return DOM_NO_MEM_ERR;
	}

	/* Allocate data buffer for result contents */
	res->data.cdata.ptr = malloc(tlen + slen + 1);
	if (res->data.cdata.ptr == NULL) {
		free(res);
		return DOM_NO_MEM_ERR;
	}

	/* Copy initial portion of target, if any, into result */
	if (ins > 0) {
		memcpy(res->data.cdata.ptr, t, (unsigned long)ins);
	}

	/* Copy inserted data into result */
	memcpy(res->data.cdata.ptr + ins, s, slen);

	/* Copy remainder of target, if any, into result */
	if (tlen - ins > 0) {
		memcpy(res->data.cdata.ptr + ins + slen, t + ins, (unsigned long)(tlen - ins));
	}

	res->data.cdata.ptr[tlen + slen] = '\0';

	res->data.cdata.len = tlen + slen;

	res->base.refcnt = 1;

	res->type = DOM_STRING_CDATA;

	*result = (dom_string *)res;

	return DOM_NO_ERR;
}
static bool save_complete_handle_element(save_complete_ctx *ctx,
		dom_node *node, save_complete_event_type event_type)
{
	dom_string *name;
	dom_namednodemap *attrs;
	const char *name_data;
	size_t name_len;
	bool process = true;
	dom_exception error;

	ctx->iter_state = STATE_NORMAL;

	error = dom_node_get_node_name(node, &name);
	if (error != DOM_NO_ERR)
		return false;

	if (name == NULL)
		return true;

	name_data = dom_string_data(name);
	name_len = dom_string_byte_length(name);

	if (name_len == SLEN("base") &&
			strncasecmp(name_data, "base", name_len) == 0) {
		/* Elide BASE elements from the output */
		process = false;
	} else if (name_len == SLEN("meta") && 
			strncasecmp(name_data, "meta", name_len) == 0) {
		/* Don't emit close tags for META elements */
		if (event_type == EVENT_LEAVE) {
			process = false;
		} else {
			/* Elide meta charsets */
			dom_string *value;
			error = dom_element_get_attribute(node,
					corestring_dom_http_equiv, &value);
			if (error != DOM_NO_ERR) {
				dom_string_unref(name);
				return false;
			}

			if (value != NULL) {
				if (dom_string_length(value) ==
					SLEN("Content-Type") &&
					strncasecmp(dom_string_data(value),
						"Content-Type",
						SLEN("Content-Type")) == 0)
					process = false;

				dom_string_unref(value);
			} else {
				bool yes;

				error = dom_element_has_attribute(node,
						corestring_dom_charset, &yes);
				if (error != DOM_NO_ERR) {
					dom_string_unref(name);
					return false;
				}

				if (yes)
					process = false;
			}
		}
	} else if (event_type == EVENT_LEAVE && 
			((name_len == SLEN("link") && 
			strncasecmp(name_data, "link", name_len) == 0))) {
		/* Don't emit close tags for void elements */
		process = false;
	}

	if (process == false) {
		dom_string_unref(name);
		return true;
	}

	fputc('<', ctx->fp);
	if (event_type == EVENT_LEAVE)
		fputc('/', ctx->fp);
	fwrite(name_data, sizeof(*name_data), name_len, ctx->fp);

	if (event_type == EVENT_ENTER) {
		error = dom_node_get_attributes(node, &attrs);
		if (error != DOM_NO_ERR) {
			dom_string_unref(name);
			return false;
		}

		if (save_complete_handle_attrs(ctx, name, attrs) == false) {
			dom_namednodemap_unref(attrs);
			dom_string_unref(name);
			return false;
		}

		dom_namednodemap_unref(attrs);
	}

	fputc('>', ctx->fp);

	/* Rewrite contents of style elements */
	if (event_type == EVENT_ENTER && name_len == SLEN("style") &&
			strncasecmp(name_data, "style", name_len) == 0) {
		dom_string *content;

		error = dom_node_get_text_content(node, &content);
		if (error != DOM_NO_ERR) {
			dom_string_unref(name);
			return false;
		}

		if (content != NULL) {
			char *rewritten;
			unsigned long len;

			/* Rewrite @import rules */
			rewritten = save_complete_rewrite_stylesheet_urls(
					ctx,
					dom_string_data(content),
					dom_string_byte_length(content),
					ctx->base,
					&len);
			if (rewritten == NULL) {
				dom_string_unref(content);
				dom_string_unref(name);
				return false;
			}

			dom_string_unref(content);

			fwrite(rewritten, sizeof(*rewritten), len, ctx->fp);

			free(rewritten);
		}

		ctx->iter_state = STATE_IN_STYLE;
	} else if (event_type == EVENT_ENTER && name_len == SLEN("head") &&
			strncasecmp(name_data, "head", name_len) == 0) {
		/* If this is a HEAD element, insert a meta charset */
		fputs("<META http-equiv=\"Content-Type\" "
				"content=\"text/html; charset=utf-8\">",
				ctx->fp);
	}

	dom_string_unref(name);

	return true;
}