vep_do_remove(struct vep_state *vep, enum dowhat what)
{
	Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
	    what, vep->endtag, vep->emptytag, vep->remove);
	assert(what == DO_TAG);
	if (vep->emptytag)
		vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
	else if (vep->remove && !vep->endtag)
		vep_error(vep, "ESI 1.0 <esi:remove> already open");
	else if (!vep->remove && vep->endtag)
		vep_error(vep, "ESI 1.0 <esi:remove> not open");
	else
		vep->remove = !vep->endtag;
}
Example #2
0
vep_do_comment(struct vep_state *vep, enum dowhat what)
{
	Debug("DO_COMMENT(%d)\n", what);
	assert(what == DO_TAG);
	if (!vep->emptytag)
		vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
}
Example #3
0
struct vsb *
VEP_Finish(struct vep_state *vep)
{
	ssize_t l, lcb;

	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);

	if (vep->include_src)
		VSB_destroy(&vep->include_src);
	if (vep->attr_vsb)
		VSB_destroy(&vep->attr_vsb);

	if (vep->state != VEP_START &&
	    vep->state != VEP_BOM &&
	    vep->state != VEP_TESTXML &&
	    vep->state != VEP_NOTXML &&
	    vep->state != VEP_NEXTTAG) {
		vep_error(vep, "VEP ended inside a tag");
	}

	if (vep->o_pending)
		vep_mark_common(vep, vep->ver_p, vep->last_mark);
	if (vep->o_wait > 0) {
		lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
		vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
	}
	// NB: We don't account for PAD+SUM+LEN in gzip'ed objects
	(void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);

	AZ(VSB_finish(vep->vsb));
	l = VSB_len(vep->vsb);
	if (vep->esi_found && l > 0)
		return (vep->vsb);
	VSB_destroy(&vep->vsb);
	return (NULL);
}
Example #4
0
void
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
{
	const char *e;
	struct vep_match *vm;
	int i;

	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
	assert(l > 0);

	if (vep->startup) {
		/*
		 * We must force the GZIP header out as a SKIP string,
		 * otherwise an object starting with <esi:include would
		 * have its GZIP header appear after the included object
		 * (e000026.vtc)
		 */
		vep->ver_p = "";
		vep->last_mark = SKIP;
		vep_mark_common(vep, vep->ver_p, VERBATIM);
		vep->startup = 0;
		AZ(vep->hack_p);
		vep->hack_p = p;
	}

	vep->ver_p = p;

	e = p + l;

	while (p < e) {
		AN(vep->state);
		i = e - p;
		if (i > 10)
			i = 10;
		Debug("EP %s %d (%.*s) [%.*s]\n",
		    vep->state,
		    vep->remove,
		    vep->tag_i, vep->tag,
		    i, p);
		assert(p >= vep->ver_p);

		/******************************************************
		 * SECTION A
		 */

		if (vep->state == VEP_START) {
			if (FEATURE(FEATURE_ESI_REMOVE_BOM) && *p == '\xeb') {
				vep->match = vep_match_bom;
				vep->state = VEP_MATCH;
			} else
				vep->state = VEP_BOM;
		} else if (vep->state == VEP_BOM) {
			vep_mark_skip(vep, p);
			if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
				vep->state = VEP_NEXTTAG;
			else
				vep->state = VEP_TESTXML;
		} else if (vep->state == VEP_TESTXML) {
			/*
			 * If the first non-whitespace char is different
			 * from '<' we assume this is not XML.
			 */
			while (p < e && vct_islws(*p))
				p++;
			vep_mark_verbatim(vep, p);
			if (p < e && *p == '<') {
				p++;
				vep->state = VEP_STARTTAG;
			} else if (p < e && *p == '\xeb') {
				VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
				    "No ESI processing, "
				    "first char not '<' but BOM."
				    " (See feature esi_remove_bom)"
				);
				vep->state = VEP_NOTXML;
			} else if (p < e) {
				VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
				    "No ESI processing, "
				    "first char not '<'."
				    " (See feature esi_disable_xml_check)"
				);
				vep->state = VEP_NOTXML;
			}
		} else if (vep->state == VEP_NOTXML) {
			/*
			 * This is not recognized as XML, just skip thru
			 * vfp_esi_end() will handle the rest
			 */
			p = e;
			vep_mark_verbatim(vep, p);

		/******************************************************
		 * SECTION B
		 */

		} else if (vep->state == VEP_NOTMYTAG) {
			if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
				p++;
				vep->state = VEP_NEXTTAG;
			} else {
				vep->tag_i = 0;
				while (p < e) {
					if (*p++ == '>') {
						vep->state = VEP_NEXTTAG;
						break;
					}
				}
			}
			if (p == e && !vep->remove)
				vep_mark_verbatim(vep, p);
		} else if (vep->state == VEP_NEXTTAG) {
			/*
			 * Hunt for start of next tag and keep an eye
			 * out for end of EsiCmt if armed.
			 */
			vep->emptytag = 0;
			vep->endtag = 0;
			vep->attr = NULL;
			vep->dostuff = NULL;
			while (p < e && *p != '<') {
				if (vep->esicmt_p == NULL) {
					p++;
					continue;
				}
				if (*p != *vep->esicmt_p) {
					p++;
					vep->esicmt_p = vep->esicmt;
					continue;
				}
				if (!vep->remove &&
				    vep->esicmt_p == vep->esicmt)
					vep_mark_verbatim(vep, p);
				p++;
				if (*++vep->esicmt_p == '\0') {
					vep->esi_found = 1;
					vep->esicmt = NULL;
					vep->esicmt_p = NULL;
					/*
					 * The end of the esicmt
					 * should not be emitted.
					 * But the stuff before should
					 */
					vep_mark_skip(vep, p);
				}
			}
			if (p < e) {
				if (!vep->remove)
					vep_mark_verbatim(vep, p);
				assert(*p == '<');
				p++;
				vep->state = VEP_STARTTAG;
			} else if (vep->esicmt_p == vep->esicmt && !vep->remove)
				vep_mark_verbatim(vep, p);

		/******************************************************
		 * SECTION C
		 */

		} else if (vep->state == VEP_STARTTAG) {
			/*
			 * Start of tag, set up match table
			 */
			if (p < e) {
				if (*p == '/') {
					vep->endtag = 1;
					p++;
				}
				vep->match = vep_match_starttag;
				vep->state = VEP_MATCH;
			}
		} else if (vep->state == VEP_COMMENT) {
			/*
			 * We are in a comment, find out if it is an
			 * ESI comment or a regular comment
			 */
			if (vep->esicmt == NULL)
				vep->esicmt_p = vep->esicmt = "esi";
			while (p < e) {
				if (*p != *vep->esicmt_p) {
					vep->esicmt_p = vep->esicmt = NULL;
					vep->until_p = vep->until = "-->";
					vep->until_s = VEP_NEXTTAG;
					vep->state = VEP_UNTIL;
					break;
				}
				p++;
				if (*++vep->esicmt_p != '\0')
					continue;
				if (vep->remove)
					vep_error(vep,
					    "ESI 1.0 Nested <!--esi"
					    " element in <esi:remove>");
				vep->esicmt_p = vep->esicmt = "-->";
				vep->state = VEP_NEXTTAG;
				vep_mark_skip(vep, p);
				break;
			}
		} else if (vep->state == VEP_CDATA) {
			/*
			 * Easy: just look for the end of CDATA
			 */
			vep->until_p = vep->until = "]]>";
			vep->until_s = VEP_NEXTTAG;
			vep->state = VEP_UNTIL;
		} else if (vep->state == VEP_ESITAG) {
			vep->in_esi_tag = 1;
			vep->esi_found = 1;
			vep_mark_skip(vep, p);
			vep->match = vep_match_esi;
			vep->state = VEP_MATCH;
		} else if (vep->state == VEP_ESIINCLUDE) {
			if (vep->remove) {
				vep_error(vep,
				    "ESI 1.0 <esi:include> element"
				    " nested in <esi:remove>");
				vep->state = VEP_TAGERROR;
			} else if (vep->endtag) {
				vep_error(vep,
				    "ESI 1.0 </esi:include> illegal end-tag");
				vep->state = VEP_TAGERROR;
			} else {
				vep->dostuff = vep_do_include;
				vep->state = VEP_INTAG;
				vep->attr = vep_match_attr_include;
			}
		} else if (vep->state == VEP_ESIREMOVE) {
			vep->dostuff = vep_do_remove;
			vep->state = VEP_INTAG;
		} else if (vep->state == VEP_ESICOMMENT) {
			if (vep->remove) {
				vep_error(vep,
				    "ESI 1.0 <esi:comment> element"
				    " nested in <esi:remove>");
				vep->state = VEP_TAGERROR;
			} else if (vep->endtag) {
				vep_error(vep,
				    "ESI 1.0 </esi:comment> illegal end-tag");
				vep->state = VEP_TAGERROR;
			} else {
				vep->dostuff = vep_do_comment;
				vep->state = VEP_INTAG;
			}
		} else if (vep->state == VEP_ESIBOGON) {
			vep_error(vep,
			    "ESI 1.0 <esi:bogus> element");
			vep->state = VEP_TAGERROR;

		/******************************************************
		 * SECTION D
		 */

		} else if (vep->state == VEP_INTAG) {
			vep->tag_i = 0;
			while (p < e && vct_islws(*p) && !vep->emptytag) {
				p++;
				vep->canattr = 1;
			}
			if (p < e && *p == '/' && !vep->emptytag) {
				p++;
				vep->emptytag = 1;
				vep->canattr = 0;
			}
			if (p < e && *p == '>') {
				p++;
				AN(vep->dostuff);
				vep_mark_skip(vep, p);
				vep->dostuff(vep, DO_TAG);
				vep->in_esi_tag = 0;
				vep->state = VEP_NEXTTAG;
			} else if (p < e && vep->emptytag) {
				vep_error(vep,
				    "XML 1.0 '>' does not follow '/' in tag");
				vep->state = VEP_TAGERROR;
			} else if (p < e && vep->canattr &&
			    vct_isxmlnamestart(*p)) {
				vep->state = VEP_ATTR;
			} else if (p < e) {
				vep_error(vep,
				    "XML 1.0 Illegal attribute start char");
				vep->state = VEP_TAGERROR;
			}
		} else if (vep->state == VEP_TAGERROR) {
			while (p < e && *p != '>')
				p++;
			if (p < e) {
				p++;
				vep_mark_skip(vep, p);
				vep->in_esi_tag = 0;
				vep->state = VEP_NEXTTAG;
			}

		/******************************************************
		 * SECTION E
		 */

		} else if (vep->state == VEP_ATTR) {
			AZ(vep->attr_delim);
			if (vep->attr == NULL) {
				p++;
				AZ(vep->attr_vsb);
				vep->state = VEP_SKIPATTR;
			} else {
				vep->match = vep->attr;
				vep->state = VEP_MATCH;
			}
		} else if (vep->state == VEP_SKIPATTR) {
			while (p < e && vct_isxmlname(*p))
				p++;
			if (p < e && *p == '=') {
				p++;
				vep->state = VEP_ATTRDELIM;
			} else if (p < e && *p == '>') {
				vep->state = VEP_INTAG;
			} else if (p < e && *p == '/') {
				vep->state = VEP_INTAG;
			} else if (p < e && vct_issp(*p)) {
				vep->state = VEP_INTAG;
			} else if (p < e) {
				vep_error(vep,
				    "XML 1.0 Illegal attr char");
				vep->state = VEP_TAGERROR;
			}
		} else if (vep->state == VEP_ATTRGETVAL) {
			vep->attr_vsb = VSB_new_auto();
			vep->state = VEP_ATTRDELIM;
		} else if (vep->state == VEP_ATTRDELIM) {
			AZ(vep->attr_delim);
			if (*p == '"' || *p == '\'') {
				vep->attr_delim = *p++;
				vep->state = VEP_ATTRVAL;
			} else if (!vct_issp(*p)) {
				vep->attr_delim = ' ';
				vep->state = VEP_ATTRVAL;
			} else {
				vep_error(vep,
				    "XML 1.0 Illegal attribute delimiter");
				vep->state = VEP_TAGERROR;
			}

		} else if (vep->state == VEP_ATTRVAL) {
			while (p < e && *p != '>' && *p != vep->attr_delim &&
			   (vep->attr_delim != ' ' || !vct_issp(*p))) {
				if (vep->attr_vsb != NULL)
					VSB_bcat(vep->attr_vsb, p, 1);
				p++;
			}
			if (p < e && *p == '>') {
				vep_error(vep,
				    "XML 1.0 Missing end attribute delimiter");
				vep->state = VEP_TAGERROR;
				vep->attr_delim = 0;
				if (vep->attr_vsb != NULL) {
					AZ(VSB_finish(vep->attr_vsb));
					VSB_delete(vep->attr_vsb);
					vep->attr_vsb = NULL;
				}
			} else if (p < e) {
				vep->attr_delim = 0;
				p++;
				vep->state = VEP_INTAG;
				if (vep->attr_vsb != NULL) {
					AZ(VSB_finish(vep->attr_vsb));
					AN(vep->dostuff);
					vep->dostuff(vep, DO_ATTR);
					vep->attr_vsb = NULL;
				}
			}

		/******************************************************
		 * Utility Section
		 */

		} else if (vep->state == VEP_MATCH) {
			/*
			 * Match against a table
			 */
			vm = vep_match(vep, p, e);
			vep->match_hit = vm;
			if (vm != NULL) {
				if (vm->match != NULL)
					p += strlen(vm->match);
				vep->state = *vm->state;
				vep->match = NULL;
				vep->tag_i = 0;
			} else {
				memcpy(vep->tag, p, e - p);
				vep->tag_i = e - p;
				vep->state = VEP_MATCHBUF;
				p = e;
			}
		} else if (vep->state == VEP_MATCHBUF) {
			/*
			 * Match against a table while split over input
			 * sections.
			 */
			AN(vep->match);
			do {
				if (*p == '>') {
					for (vm = vep->match;
					    vm->match != NULL; vm++)
						continue;
					AZ(vm->match);
				} else {
					vep->tag[vep->tag_i++] = *p++;
					vm = vep_match(vep,
					    vep->tag, vep->tag + vep->tag_i);
					if (vm && vm->match == NULL) {
						vep->tag_i--;
						p--;
					}
				}
			} while (vm == NULL && p < e);
			vep->match_hit = vm;
			if (vm == NULL) {
				assert(p == e);
			} else {
				vep->state = *vm->state;
				vep->match = NULL;
			}
		} else if (vep->state == VEP_UNTIL) {
			/*
			 * Skip until we see magic string
			 */
			while (p < e) {
				if (*p++ != *vep->until_p++) {
					vep->until_p = vep->until;
				} else if (*vep->until_p == '\0') {
					vep->state = vep->until_s;
					break;
				}
			}
			if (p == e && !vep->remove)
				vep_mark_verbatim(vep, p);
		} else {
			Debug("*** Unknown state %s\n", vep->state);
			INCOMPL();
		}
	}
	/*
	 * We must always mark up the storage we got, try to do so
	 * in the most efficient way, in particular with respect to
	 * minimizing and limiting use of pending.
	 */
	if (p == vep->ver_p)
		;
	else if (vep->in_esi_tag)
		vep_mark_skip(vep, p);
	else if (vep->remove)
		vep_mark_skip(vep, p);
	else
		vep_mark_pending(vep, p);
}
Example #5
0
vep_do_include(struct vep_state *vep, enum dowhat what)
{
	const char *p, *q, *h;
	ssize_t l;

	Debug("DO_INCLUDE(%d)\n", what);
	if (what == DO_ATTR) {
		Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
			VSB_data(vep->attr_vsb));
		if (vep->include_src != NULL) {
			vep_error(vep,
			    "ESI 1.0 <esi:include> "
			    "has multiple src= attributes");
			vep->state = VEP_TAGERROR;
			VSB_delete(vep->attr_vsb);
			VSB_delete(vep->include_src);
			vep->attr_vsb = NULL;
			vep->include_src = NULL;
			return;
		}
		vep->include_src = vep->attr_vsb;
		return;
	}
	assert(what == DO_TAG);
	if (!vep->emptytag)
		vep_warn(vep,
		    "ESI 1.0 <esi:include> lacks final '/'");
	if (vep->include_src == NULL) {
		vep_error(vep,
		    "ESI 1.0 <esi:include> lacks src attr");
		return;
	}

	/*
	 * Strictly speaking, we ought to spit out any piled up skip before
	 * emitting the VEC for the include, but objectively that makes no
	 * difference and robs us of a chance to collapse another skip into
	 * this on so we don't do that.
	 * However, we cannot tolerate any verbatim stuff piling up.
	 * The mark_skip() before calling dostuff should have taken
	 * care of that.  Make sure.
	 */
	assert(vep->o_wait == 0 || vep->last_mark == SKIP);
	/* XXX: what if it contains NUL bytes ?? */
	p = VSB_data(vep->include_src);
	l = VSB_len(vep->include_src);
	h = 0;

	if (l > 7 && !memcmp(p, "http://", 7)) {
		h = p + 7;
		p = strchr(h, '/');
		AN(p);
		Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
		VSB_printf(vep->vsb, "%c", VEC_INCL);
		VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
	} else if (l > 8 && !memcmp(p, "https://", 8)) {
		if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) {
			vep_warn(vep,
			    "ESI 1.0 <esi:include> with https:// ignored");
			vep->state = VEP_TAGERROR;
			vep->attr_vsb = NULL;
			vep->include_src = NULL;
			return;
		}
		vep_warn(vep,
		    "ESI 1.0 <esi:include> https:// treated as http://");
		h = p + 8;
		p = strchr(h, '/');
		AN(p);
		VSB_printf(vep->vsb, "%c", VEC_INCL);
		VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
	} else if (*p == '/') {
		VSB_printf(vep->vsb, "%c", VEC_INCL);
		VSB_printf(vep->vsb, "%c", 0);
	} else {
		VSB_printf(vep->vsb, "%c", VEC_INCL);
		VSB_printf(vep->vsb, "%c", 0);
		/* Look for the last / before a '?' */
		h = NULL;
		for (q = vep->url; *q && *q != '?'; q++)
			if (*q == '/')
				h = q;
		if (h == NULL)
			h = q + 1;

		Debug("INCL:: [%.*s]/[%s]\n",
		    (int)(h - vep->url), vep->url, p);
		VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
	}
	l -= (p - VSB_data(vep->include_src));
	for (q = p; *q != '\0'; ) {
		if (*q == '&') {
#define R(w,f,r)							\
			if (q + w <= p + l && !memcmp(q, f, w)) { \
				VSB_printf(vep->vsb, "%c", r);	\
				q += w;				\
				continue;			\
			}
			R(6, "&apos;", '\'');
			R(6, "&quot;", '"');
			R(4, "&lt;", '<');
			R(4, "&gt;", '>');
			R(5, "&amp;", '&');
		}
		VSB_printf(vep->vsb, "%c", *q++);
	}
#undef R
	VSB_printf(vep->vsb, "%c", 0);

	VSB_delete(vep->include_src);
	vep->include_src = NULL;
}