Пример #1
0
static void
vep_mark_skip(struct vep_state *vep, const char *p)
{

	vep_mark_common(vep, p, SKIP);
	vep->nm_skip++;
}
struct vsb *
VEP_Finish(const struct sess *sp)
{
	struct vep_state *vep;
	ssize_t l, lcb;

	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
	vep = sp->wrk->vep;
	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);

	if (vep->o_pending)
		vep_mark_common(vep, vep->ver_p, vep->last_mark);
	if (vep->o_wait > 0) {
		lcb = vep->cb(vep->sp, 0, VGZ_ALIGN);
		vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
	}
	(void)vep->cb(vep->sp, 0, VGZ_FINISH);

	sp->wrk->vep = NULL;

	AZ(VSB_finish(vep->vsb));
	l = VSB_len(vep->vsb);
	if (vep->esi_found && l > 0)
		return (vep->vsb);
	VSB_delete(vep->vsb);
	return (NULL);
}
Пример #3
0
static void
vep_mark_verbatim(struct vep_state *vep, const char *p)
{

	vep_mark_common(vep, p, VERBATIM);
	vep->nm_verbatim++;
}
Пример #4
0
struct vsb *
VEP_Finish(struct vep_state *vep)
{
	ssize_t l, lcb;

	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);

	AZ(vep->include_src);
	AZ(vep->attr_vsb);
	if (vep->o_pending)
		vep_mark_common(vep, vep->ver_p, vep->last_mark);
	if (vep->o_wait > 0) {
		lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
		vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
	}
	// NB: We don't account for PAD+SUM+LEN in gzip'ed objects
	(void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);

	AZ(VSB_finish(vep->vsb));
	l = VSB_len(vep->vsb);
	if (vep->esi_found && l > 0)
		return (vep->vsb);
	VSB_destroy(&vep->vsb);
	return (NULL);
}
Пример #5
0
struct vsb *
VEP_Finish(const struct worker *wrk)
{
	struct vep_state *vep;
	ssize_t l, lcb;

	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
	CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC);
	vep = wrk->busyobj->vep;
	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);

	if (vep->o_pending)
		vep_mark_common(vep, vep->ver_p, vep->last_mark);
	if (vep->o_wait > 0) {
		lcb = vep->cb(vep->wrk, 0, VGZ_ALIGN);
		vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
	}
	(void)vep->cb(vep->wrk, 0, VGZ_FINISH);

	wrk->busyobj->vep = NULL;
	AZ(VSB_finish(vep->vsb));
	l = VSB_len(vep->vsb);
	if (vep->esi_found && l > 0)
		return (vep->vsb);
	VSB_delete(vep->vsb);
	return (NULL);
}
Пример #6
0
struct vsb *
VEP_Finish(struct busyobj *bo)
{
	struct vep_state *vep;
	ssize_t l, lcb;

	CHECK_OBJ_NOTNULL(bo, BUSYOBJ_MAGIC);
	vep = bo->vep;
	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
	assert(vep->bo == bo);

	if (vep->o_pending)
		vep_mark_common(vep, vep->ver_p, vep->last_mark);
	if (vep->o_wait > 0) {
		lcb = vep->cb(vep->bo, 0, VGZ_ALIGN);
		vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
	}
	(void)vep->cb(vep->bo, 0, VGZ_FINISH);

	bo->vep = NULL;
	AZ(VSB_finish(vep->vsb));
	l = VSB_len(vep->vsb);
	if (vep->esi_found && l > 0)
		return (vep->vsb);
	VSB_delete(vep->vsb);
	return (NULL);
}
Пример #7
0
void
VEP_Init(struct worker *wrk, vep_callback_t *cb)
{
	struct vep_state *vep;

	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
	CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC);
	AZ(wrk->busyobj->vep);
	vep = (void*)WS_Alloc(wrk->ws, sizeof *vep);
	AN(vep);

	memset(vep, 0, sizeof *vep);
	vep->magic = VEP_MAGIC;
	vep->wrk = wrk;
	vep->vsb = VSB_new_auto();
	AN(vep->vsb);
	wrk->busyobj->vep = vep;

	if (cb != NULL) {
		vep->dogzip = 1;
		/* XXX */
		VSB_printf(vep->vsb, "%c", VEC_GZ);
		vep->cb = cb;
	} else {
		vep->cb = vep_default_cb;
	}

	vep->state = VEP_START;
	vep->crc = crc32(0L, Z_NULL, 0);
	vep->crcp = crc32(0L, Z_NULL, 0);

	/*
	 * We must force the GZIP header out as a SKIP string, otherwise
	 * an object starting with <esi:include would have its GZIP header
	 * appear after the included object (e000026.vtc)
	 */
	vep->startup = 1;
	vep->ver_p = "";
	vep->last_mark = SKIP;
	vep_mark_common(vep, vep->ver_p, VERBATIM);
	vep->startup = 0;
}
Пример #8
0
struct vsb *
VEP_Finish(struct vep_state *vep)
{
	ssize_t l, lcb;

	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);

	if (vep->o_pending)
		vep_mark_common(vep, vep->ver_p, vep->last_mark);
	if (vep->o_wait > 0) {
		lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
		vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
	}
	(void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);

	AZ(VSB_finish(vep->vsb));
	l = VSB_len(vep->vsb);
	if (vep->esi_found && l > 0)
		return (vep->vsb);
	VSB_delete(vep->vsb);
	return (NULL);
}
Пример #9
0
void
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
{
	const char *e;
	struct vep_match *vm;
	int i;

	CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
	assert(l > 0);

	if (vep->startup) {
		/*
		 * We must force the GZIP header out as a SKIP string,
		 * otherwise an object starting with <esi:include would
		 * have its GZIP header appear after the included object
		 * (e000026.vtc)
		 */
		vep->ver_p = "";
		vep->last_mark = SKIP;
		vep_mark_common(vep, vep->ver_p, VERBATIM);
		vep->startup = 0;
		AZ(vep->hack_p);
		vep->hack_p = p;
	}

	vep->ver_p = p;

	e = p + l;

	while (p < e) {
		AN(vep->state);
		i = e - p;
		if (i > 10)
			i = 10;
		Debug("EP %s %d (%.*s) [%.*s]\n",
		    vep->state,
		    vep->remove,
		    vep->tag_i, vep->tag,
		    i, p);
		assert(p >= vep->ver_p);

		/******************************************************
		 * SECTION A
		 */

		if (vep->state == VEP_START) {
			if (FEATURE(FEATURE_ESI_REMOVE_BOM) && *p == '\xeb') {
				vep->match = vep_match_bom;
				vep->state = VEP_MATCH;
			} else
				vep->state = VEP_BOM;
		} else if (vep->state == VEP_BOM) {
			vep_mark_skip(vep, p);
			if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
				vep->state = VEP_NEXTTAG;
			else
				vep->state = VEP_TESTXML;
		} else if (vep->state == VEP_TESTXML) {
			/*
			 * If the first non-whitespace char is different
			 * from '<' we assume this is not XML.
			 */
			while (p < e && vct_islws(*p))
				p++;
			vep_mark_verbatim(vep, p);
			if (p < e && *p == '<') {
				p++;
				vep->state = VEP_STARTTAG;
			} else if (p < e && *p == '\xeb') {
				VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
				    "No ESI processing, "
				    "first char not '<' but BOM."
				    " (See feature esi_remove_bom)"
				);
				vep->state = VEP_NOTXML;
			} else if (p < e) {
				VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
				    "No ESI processing, "
				    "first char not '<'."
				    " (See feature esi_disable_xml_check)"
				);
				vep->state = VEP_NOTXML;
			}
		} else if (vep->state == VEP_NOTXML) {
			/*
			 * This is not recognized as XML, just skip thru
			 * vfp_esi_end() will handle the rest
			 */
			p = e;
			vep_mark_verbatim(vep, p);

		/******************************************************
		 * SECTION B
		 */

		} else if (vep->state == VEP_NOTMYTAG) {
			if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
				p++;
				vep->state = VEP_NEXTTAG;
			} else {
				vep->tag_i = 0;
				while (p < e) {
					if (*p++ == '>') {
						vep->state = VEP_NEXTTAG;
						break;
					}
				}
			}
			if (p == e && !vep->remove)
				vep_mark_verbatim(vep, p);
		} else if (vep->state == VEP_NEXTTAG) {
			/*
			 * Hunt for start of next tag and keep an eye
			 * out for end of EsiCmt if armed.
			 */
			vep->emptytag = 0;
			vep->endtag = 0;
			vep->attr = NULL;
			vep->dostuff = NULL;
			while (p < e && *p != '<') {
				if (vep->esicmt_p == NULL) {
					p++;
					continue;
				}
				if (*p != *vep->esicmt_p) {
					p++;
					vep->esicmt_p = vep->esicmt;
					continue;
				}
				if (!vep->remove &&
				    vep->esicmt_p == vep->esicmt)
					vep_mark_verbatim(vep, p);
				p++;
				if (*++vep->esicmt_p == '\0') {
					vep->esi_found = 1;
					vep->esicmt = NULL;
					vep->esicmt_p = NULL;
					/*
					 * The end of the esicmt
					 * should not be emitted.
					 * But the stuff before should
					 */
					vep_mark_skip(vep, p);
				}
			}
			if (p < e) {
				if (!vep->remove)
					vep_mark_verbatim(vep, p);
				assert(*p == '<');
				p++;
				vep->state = VEP_STARTTAG;
			} else if (vep->esicmt_p == vep->esicmt && !vep->remove)
				vep_mark_verbatim(vep, p);

		/******************************************************
		 * SECTION C
		 */

		} else if (vep->state == VEP_STARTTAG) {
			/*
			 * Start of tag, set up match table
			 */
			if (p < e) {
				if (*p == '/') {
					vep->endtag = 1;
					p++;
				}
				vep->match = vep_match_starttag;
				vep->state = VEP_MATCH;
			}
		} else if (vep->state == VEP_COMMENT) {
			/*
			 * We are in a comment, find out if it is an
			 * ESI comment or a regular comment
			 */
			if (vep->esicmt == NULL)
				vep->esicmt_p = vep->esicmt = "esi";
			while (p < e) {
				if (*p != *vep->esicmt_p) {
					vep->esicmt_p = vep->esicmt = NULL;
					vep->until_p = vep->until = "-->";
					vep->until_s = VEP_NEXTTAG;
					vep->state = VEP_UNTIL;
					break;
				}
				p++;
				if (*++vep->esicmt_p != '\0')
					continue;
				if (vep->remove)
					vep_error(vep,
					    "ESI 1.0 Nested <!--esi"
					    " element in <esi:remove>");
				vep->esicmt_p = vep->esicmt = "-->";
				vep->state = VEP_NEXTTAG;
				vep_mark_skip(vep, p);
				break;
			}
		} else if (vep->state == VEP_CDATA) {
			/*
			 * Easy: just look for the end of CDATA
			 */
			vep->until_p = vep->until = "]]>";
			vep->until_s = VEP_NEXTTAG;
			vep->state = VEP_UNTIL;
		} else if (vep->state == VEP_ESITAG) {
			vep->in_esi_tag = 1;
			vep->esi_found = 1;
			vep_mark_skip(vep, p);
			vep->match = vep_match_esi;
			vep->state = VEP_MATCH;
		} else if (vep->state == VEP_ESIINCLUDE) {
			if (vep->remove) {
				vep_error(vep,
				    "ESI 1.0 <esi:include> element"
				    " nested in <esi:remove>");
				vep->state = VEP_TAGERROR;
			} else if (vep->endtag) {
				vep_error(vep,
				    "ESI 1.0 </esi:include> illegal end-tag");
				vep->state = VEP_TAGERROR;
			} else {
				vep->dostuff = vep_do_include;
				vep->state = VEP_INTAG;
				vep->attr = vep_match_attr_include;
			}
		} else if (vep->state == VEP_ESIREMOVE) {
			vep->dostuff = vep_do_remove;
			vep->state = VEP_INTAG;
		} else if (vep->state == VEP_ESICOMMENT) {
			if (vep->remove) {
				vep_error(vep,
				    "ESI 1.0 <esi:comment> element"
				    " nested in <esi:remove>");
				vep->state = VEP_TAGERROR;
			} else if (vep->endtag) {
				vep_error(vep,
				    "ESI 1.0 </esi:comment> illegal end-tag");
				vep->state = VEP_TAGERROR;
			} else {
				vep->dostuff = vep_do_comment;
				vep->state = VEP_INTAG;
			}
		} else if (vep->state == VEP_ESIBOGON) {
			vep_error(vep,
			    "ESI 1.0 <esi:bogus> element");
			vep->state = VEP_TAGERROR;

		/******************************************************
		 * SECTION D
		 */

		} else if (vep->state == VEP_INTAG) {
			vep->tag_i = 0;
			while (p < e && vct_islws(*p) && !vep->emptytag) {
				p++;
				vep->canattr = 1;
			}
			if (p < e && *p == '/' && !vep->emptytag) {
				p++;
				vep->emptytag = 1;
				vep->canattr = 0;
			}
			if (p < e && *p == '>') {
				p++;
				AN(vep->dostuff);
				vep_mark_skip(vep, p);
				vep->dostuff(vep, DO_TAG);
				vep->in_esi_tag = 0;
				vep->state = VEP_NEXTTAG;
			} else if (p < e && vep->emptytag) {
				vep_error(vep,
				    "XML 1.0 '>' does not follow '/' in tag");
				vep->state = VEP_TAGERROR;
			} else if (p < e && vep->canattr &&
			    vct_isxmlnamestart(*p)) {
				vep->state = VEP_ATTR;
			} else if (p < e) {
				vep_error(vep,
				    "XML 1.0 Illegal attribute start char");
				vep->state = VEP_TAGERROR;
			}
		} else if (vep->state == VEP_TAGERROR) {
			while (p < e && *p != '>')
				p++;
			if (p < e) {
				p++;
				vep_mark_skip(vep, p);
				vep->in_esi_tag = 0;
				vep->state = VEP_NEXTTAG;
			}

		/******************************************************
		 * SECTION E
		 */

		} else if (vep->state == VEP_ATTR) {
			AZ(vep->attr_delim);
			if (vep->attr == NULL) {
				p++;
				AZ(vep->attr_vsb);
				vep->state = VEP_SKIPATTR;
			} else {
				vep->match = vep->attr;
				vep->state = VEP_MATCH;
			}
		} else if (vep->state == VEP_SKIPATTR) {
			while (p < e && vct_isxmlname(*p))
				p++;
			if (p < e && *p == '=') {
				p++;
				vep->state = VEP_ATTRDELIM;
			} else if (p < e && *p == '>') {
				vep->state = VEP_INTAG;
			} else if (p < e && *p == '/') {
				vep->state = VEP_INTAG;
			} else if (p < e && vct_issp(*p)) {
				vep->state = VEP_INTAG;
			} else if (p < e) {
				vep_error(vep,
				    "XML 1.0 Illegal attr char");
				vep->state = VEP_TAGERROR;
			}
		} else if (vep->state == VEP_ATTRGETVAL) {
			vep->attr_vsb = VSB_new_auto();
			vep->state = VEP_ATTRDELIM;
		} else if (vep->state == VEP_ATTRDELIM) {
			AZ(vep->attr_delim);
			if (*p == '"' || *p == '\'') {
				vep->attr_delim = *p++;
				vep->state = VEP_ATTRVAL;
			} else if (!vct_issp(*p)) {
				vep->attr_delim = ' ';
				vep->state = VEP_ATTRVAL;
			} else {
				vep_error(vep,
				    "XML 1.0 Illegal attribute delimiter");
				vep->state = VEP_TAGERROR;
			}

		} else if (vep->state == VEP_ATTRVAL) {
			while (p < e && *p != '>' && *p != vep->attr_delim &&
			   (vep->attr_delim != ' ' || !vct_issp(*p))) {
				if (vep->attr_vsb != NULL)
					VSB_bcat(vep->attr_vsb, p, 1);
				p++;
			}
			if (p < e && *p == '>') {
				vep_error(vep,
				    "XML 1.0 Missing end attribute delimiter");
				vep->state = VEP_TAGERROR;
				vep->attr_delim = 0;
				if (vep->attr_vsb != NULL) {
					AZ(VSB_finish(vep->attr_vsb));
					VSB_delete(vep->attr_vsb);
					vep->attr_vsb = NULL;
				}
			} else if (p < e) {
				vep->attr_delim = 0;
				p++;
				vep->state = VEP_INTAG;
				if (vep->attr_vsb != NULL) {
					AZ(VSB_finish(vep->attr_vsb));
					AN(vep->dostuff);
					vep->dostuff(vep, DO_ATTR);
					vep->attr_vsb = NULL;
				}
			}

		/******************************************************
		 * Utility Section
		 */

		} else if (vep->state == VEP_MATCH) {
			/*
			 * Match against a table
			 */
			vm = vep_match(vep, p, e);
			vep->match_hit = vm;
			if (vm != NULL) {
				if (vm->match != NULL)
					p += strlen(vm->match);
				vep->state = *vm->state;
				vep->match = NULL;
				vep->tag_i = 0;
			} else {
				memcpy(vep->tag, p, e - p);
				vep->tag_i = e - p;
				vep->state = VEP_MATCHBUF;
				p = e;
			}
		} else if (vep->state == VEP_MATCHBUF) {
			/*
			 * Match against a table while split over input
			 * sections.
			 */
			AN(vep->match);
			do {
				if (*p == '>') {
					for (vm = vep->match;
					    vm->match != NULL; vm++)
						continue;
					AZ(vm->match);
				} else {
					vep->tag[vep->tag_i++] = *p++;
					vm = vep_match(vep,
					    vep->tag, vep->tag + vep->tag_i);
					if (vm && vm->match == NULL) {
						vep->tag_i--;
						p--;
					}
				}
			} while (vm == NULL && p < e);
			vep->match_hit = vm;
			if (vm == NULL) {
				assert(p == e);
			} else {
				vep->state = *vm->state;
				vep->match = NULL;
			}
		} else if (vep->state == VEP_UNTIL) {
			/*
			 * Skip until we see magic string
			 */
			while (p < e) {
				if (*p++ != *vep->until_p++) {
					vep->until_p = vep->until;
				} else if (*vep->until_p == '\0') {
					vep->state = vep->until_s;
					break;
				}
			}
			if (p == e && !vep->remove)
				vep_mark_verbatim(vep, p);
		} else {
			Debug("*** Unknown state %s\n", vep->state);
			INCOMPL();
		}
	}
	/*
	 * We must always mark up the storage we got, try to do so
	 * in the most efficient way, in particular with respect to
	 * minimizing and limiting use of pending.
	 */
	if (p == vep->ver_p)
		;
	else if (vep->in_esi_tag)
		vep_mark_skip(vep, p);
	else if (vep->remove)
		vep_mark_skip(vep, p);
	else
		vep_mark_pending(vep, p);
}