static void vep_mark_skip(struct vep_state *vep, const char *p) { vep_mark_common(vep, p, SKIP); vep->nm_skip++; }
struct vsb * VEP_Finish(const struct sess *sp) { struct vep_state *vep; ssize_t l, lcb; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); vep = sp->wrk->vep; CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); if (vep->o_pending) vep_mark_common(vep, vep->ver_p, vep->last_mark); if (vep->o_wait > 0) { lcb = vep->cb(vep->sp, 0, VGZ_ALIGN); vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); } (void)vep->cb(vep->sp, 0, VGZ_FINISH); sp->wrk->vep = NULL; AZ(VSB_finish(vep->vsb)); l = VSB_len(vep->vsb); if (vep->esi_found && l > 0) return (vep->vsb); VSB_delete(vep->vsb); return (NULL); }
static void vep_mark_verbatim(struct vep_state *vep, const char *p) { vep_mark_common(vep, p, VERBATIM); vep->nm_verbatim++; }
struct vsb * VEP_Finish(struct vep_state *vep) { ssize_t l, lcb; CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); AZ(vep->include_src); AZ(vep->attr_vsb); if (vep->o_pending) vep_mark_common(vep, vep->ver_p, vep->last_mark); if (vep->o_wait > 0) { lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); } // NB: We don't account for PAD+SUM+LEN in gzip'ed objects (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); AZ(VSB_finish(vep->vsb)); l = VSB_len(vep->vsb); if (vep->esi_found && l > 0) return (vep->vsb); VSB_destroy(&vep->vsb); return (NULL); }
struct vsb * VEP_Finish(const struct worker *wrk) { struct vep_state *vep; ssize_t l, lcb; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC); vep = wrk->busyobj->vep; CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); if (vep->o_pending) vep_mark_common(vep, vep->ver_p, vep->last_mark); if (vep->o_wait > 0) { lcb = vep->cb(vep->wrk, 0, VGZ_ALIGN); vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); } (void)vep->cb(vep->wrk, 0, VGZ_FINISH); wrk->busyobj->vep = NULL; AZ(VSB_finish(vep->vsb)); l = VSB_len(vep->vsb); if (vep->esi_found && l > 0) return (vep->vsb); VSB_delete(vep->vsb); return (NULL); }
struct vsb * VEP_Finish(struct busyobj *bo) { struct vep_state *vep; ssize_t l, lcb; CHECK_OBJ_NOTNULL(bo, BUSYOBJ_MAGIC); vep = bo->vep; CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); assert(vep->bo == bo); if (vep->o_pending) vep_mark_common(vep, vep->ver_p, vep->last_mark); if (vep->o_wait > 0) { lcb = vep->cb(vep->bo, 0, VGZ_ALIGN); vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); } (void)vep->cb(vep->bo, 0, VGZ_FINISH); bo->vep = NULL; AZ(VSB_finish(vep->vsb)); l = VSB_len(vep->vsb); if (vep->esi_found && l > 0) return (vep->vsb); VSB_delete(vep->vsb); return (NULL); }
void VEP_Init(struct worker *wrk, vep_callback_t *cb) { struct vep_state *vep; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(wrk->busyobj, BUSYOBJ_MAGIC); AZ(wrk->busyobj->vep); vep = (void*)WS_Alloc(wrk->ws, sizeof *vep); AN(vep); memset(vep, 0, sizeof *vep); vep->magic = VEP_MAGIC; vep->wrk = wrk; vep->vsb = VSB_new_auto(); AN(vep->vsb); wrk->busyobj->vep = vep; if (cb != NULL) { vep->dogzip = 1; /* XXX */ VSB_printf(vep->vsb, "%c", VEC_GZ); vep->cb = cb; } else { vep->cb = vep_default_cb; } vep->state = VEP_START; vep->crc = crc32(0L, Z_NULL, 0); vep->crcp = crc32(0L, Z_NULL, 0); /* * We must force the GZIP header out as a SKIP string, otherwise * an object starting with <esi:include would have its GZIP header * appear after the included object (e000026.vtc) */ vep->startup = 1; vep->ver_p = ""; vep->last_mark = SKIP; vep_mark_common(vep, vep->ver_p, VERBATIM); vep->startup = 0; }
struct vsb * VEP_Finish(struct vep_state *vep) { ssize_t l, lcb; CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); if (vep->o_pending) vep_mark_common(vep, vep->ver_p, vep->last_mark); if (vep->o_wait > 0) { lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); } (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); AZ(VSB_finish(vep->vsb)); l = VSB_len(vep->vsb); if (vep->esi_found && l > 0) return (vep->vsb); VSB_delete(vep->vsb); return (NULL); }
void VEP_Parse(struct vep_state *vep, const char *p, size_t l) { const char *e; struct vep_match *vm; int i; CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); assert(l > 0); if (vep->startup) { /* * We must force the GZIP header out as a SKIP string, * otherwise an object starting with <esi:include would * have its GZIP header appear after the included object * (e000026.vtc) */ vep->ver_p = ""; vep->last_mark = SKIP; vep_mark_common(vep, vep->ver_p, VERBATIM); vep->startup = 0; AZ(vep->hack_p); vep->hack_p = p; } vep->ver_p = p; e = p + l; while (p < e) { AN(vep->state); i = e - p; if (i > 10) i = 10; Debug("EP %s %d (%.*s) [%.*s]\n", vep->state, vep->remove, vep->tag_i, vep->tag, i, p); assert(p >= vep->ver_p); /****************************************************** * SECTION A */ if (vep->state == VEP_START) { if (FEATURE(FEATURE_ESI_REMOVE_BOM) && *p == '\xeb') { vep->match = vep_match_bom; vep->state = VEP_MATCH; } else vep->state = VEP_BOM; } else if (vep->state == VEP_BOM) { vep_mark_skip(vep, p); if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK)) vep->state = VEP_NEXTTAG; else vep->state = VEP_TESTXML; } else if (vep->state == VEP_TESTXML) { /* * If the first non-whitespace char is different * from '<' we assume this is not XML. */ while (p < e && vct_islws(*p)) p++; vep_mark_verbatim(vep, p); if (p < e && *p == '<') { p++; vep->state = VEP_STARTTAG; } else if (p < e && *p == '\xeb') { VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "No ESI processing, " "first char not '<' but BOM." " (See feature esi_remove_bom)" ); vep->state = VEP_NOTXML; } else if (p < e) { VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "No ESI processing, " "first char not '<'." " (See feature esi_disable_xml_check)" ); vep->state = VEP_NOTXML; } } else if (vep->state == VEP_NOTXML) { /* * This is not recognized as XML, just skip thru * vfp_esi_end() will handle the rest */ p = e; vep_mark_verbatim(vep, p); /****************************************************** * SECTION B */ } else if (vep->state == VEP_NOTMYTAG) { if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) { p++; vep->state = VEP_NEXTTAG; } else { vep->tag_i = 0; while (p < e) { if (*p++ == '>') { vep->state = VEP_NEXTTAG; break; } } } if (p == e && !vep->remove) vep_mark_verbatim(vep, p); } else if (vep->state == VEP_NEXTTAG) { /* * Hunt for start of next tag and keep an eye * out for end of EsiCmt if armed. */ vep->emptytag = 0; vep->endtag = 0; vep->attr = NULL; vep->dostuff = NULL; while (p < e && *p != '<') { if (vep->esicmt_p == NULL) { p++; continue; } if (*p != *vep->esicmt_p) { p++; vep->esicmt_p = vep->esicmt; continue; } if (!vep->remove && vep->esicmt_p == vep->esicmt) vep_mark_verbatim(vep, p); p++; if (*++vep->esicmt_p == '\0') { vep->esi_found = 1; vep->esicmt = NULL; vep->esicmt_p = NULL; /* * The end of the esicmt * should not be emitted. * But the stuff before should */ vep_mark_skip(vep, p); } } if (p < e) { if (!vep->remove) vep_mark_verbatim(vep, p); assert(*p == '<'); p++; vep->state = VEP_STARTTAG; } else if (vep->esicmt_p == vep->esicmt && !vep->remove) vep_mark_verbatim(vep, p); /****************************************************** * SECTION C */ } else if (vep->state == VEP_STARTTAG) { /* * Start of tag, set up match table */ if (p < e) { if (*p == '/') { vep->endtag = 1; p++; } vep->match = vep_match_starttag; vep->state = VEP_MATCH; } } else if (vep->state == VEP_COMMENT) { /* * We are in a comment, find out if it is an * ESI comment or a regular comment */ if (vep->esicmt == NULL) vep->esicmt_p = vep->esicmt = "esi"; while (p < e) { if (*p != *vep->esicmt_p) { vep->esicmt_p = vep->esicmt = NULL; vep->until_p = vep->until = "-->"; vep->until_s = VEP_NEXTTAG; vep->state = VEP_UNTIL; break; } p++; if (*++vep->esicmt_p != '\0') continue; if (vep->remove) vep_error(vep, "ESI 1.0 Nested <!--esi" " element in <esi:remove>"); vep->esicmt_p = vep->esicmt = "-->"; vep->state = VEP_NEXTTAG; vep_mark_skip(vep, p); break; } } else if (vep->state == VEP_CDATA) { /* * Easy: just look for the end of CDATA */ vep->until_p = vep->until = "]]>"; vep->until_s = VEP_NEXTTAG; vep->state = VEP_UNTIL; } else if (vep->state == VEP_ESITAG) { vep->in_esi_tag = 1; vep->esi_found = 1; vep_mark_skip(vep, p); vep->match = vep_match_esi; vep->state = VEP_MATCH; } else if (vep->state == VEP_ESIINCLUDE) { if (vep->remove) { vep_error(vep, "ESI 1.0 <esi:include> element" " nested in <esi:remove>"); vep->state = VEP_TAGERROR; } else if (vep->endtag) { vep_error(vep, "ESI 1.0 </esi:include> illegal end-tag"); vep->state = VEP_TAGERROR; } else { vep->dostuff = vep_do_include; vep->state = VEP_INTAG; vep->attr = vep_match_attr_include; } } else if (vep->state == VEP_ESIREMOVE) { vep->dostuff = vep_do_remove; vep->state = VEP_INTAG; } else if (vep->state == VEP_ESICOMMENT) { if (vep->remove) { vep_error(vep, "ESI 1.0 <esi:comment> element" " nested in <esi:remove>"); vep->state = VEP_TAGERROR; } else if (vep->endtag) { vep_error(vep, "ESI 1.0 </esi:comment> illegal end-tag"); vep->state = VEP_TAGERROR; } else { vep->dostuff = vep_do_comment; vep->state = VEP_INTAG; } } else if (vep->state == VEP_ESIBOGON) { vep_error(vep, "ESI 1.0 <esi:bogus> element"); vep->state = VEP_TAGERROR; /****************************************************** * SECTION D */ } else if (vep->state == VEP_INTAG) { vep->tag_i = 0; while (p < e && vct_islws(*p) && !vep->emptytag) { p++; vep->canattr = 1; } if (p < e && *p == '/' && !vep->emptytag) { p++; vep->emptytag = 1; vep->canattr = 0; } if (p < e && *p == '>') { p++; AN(vep->dostuff); vep_mark_skip(vep, p); vep->dostuff(vep, DO_TAG); vep->in_esi_tag = 0; vep->state = VEP_NEXTTAG; } else if (p < e && vep->emptytag) { vep_error(vep, "XML 1.0 '>' does not follow '/' in tag"); vep->state = VEP_TAGERROR; } else if (p < e && vep->canattr && vct_isxmlnamestart(*p)) { vep->state = VEP_ATTR; } else if (p < e) { vep_error(vep, "XML 1.0 Illegal attribute start char"); vep->state = VEP_TAGERROR; } } else if (vep->state == VEP_TAGERROR) { while (p < e && *p != '>') p++; if (p < e) { p++; vep_mark_skip(vep, p); vep->in_esi_tag = 0; vep->state = VEP_NEXTTAG; } /****************************************************** * SECTION E */ } else if (vep->state == VEP_ATTR) { AZ(vep->attr_delim); if (vep->attr == NULL) { p++; AZ(vep->attr_vsb); vep->state = VEP_SKIPATTR; } else { vep->match = vep->attr; vep->state = VEP_MATCH; } } else if (vep->state == VEP_SKIPATTR) { while (p < e && vct_isxmlname(*p)) p++; if (p < e && *p == '=') { p++; vep->state = VEP_ATTRDELIM; } else if (p < e && *p == '>') { vep->state = VEP_INTAG; } else if (p < e && *p == '/') { vep->state = VEP_INTAG; } else if (p < e && vct_issp(*p)) { vep->state = VEP_INTAG; } else if (p < e) { vep_error(vep, "XML 1.0 Illegal attr char"); vep->state = VEP_TAGERROR; } } else if (vep->state == VEP_ATTRGETVAL) { vep->attr_vsb = VSB_new_auto(); vep->state = VEP_ATTRDELIM; } else if (vep->state == VEP_ATTRDELIM) { AZ(vep->attr_delim); if (*p == '"' || *p == '\'') { vep->attr_delim = *p++; vep->state = VEP_ATTRVAL; } else if (!vct_issp(*p)) { vep->attr_delim = ' '; vep->state = VEP_ATTRVAL; } else { vep_error(vep, "XML 1.0 Illegal attribute delimiter"); vep->state = VEP_TAGERROR; } } else if (vep->state == VEP_ATTRVAL) { while (p < e && *p != '>' && *p != vep->attr_delim && (vep->attr_delim != ' ' || !vct_issp(*p))) { if (vep->attr_vsb != NULL) VSB_bcat(vep->attr_vsb, p, 1); p++; } if (p < e && *p == '>') { vep_error(vep, "XML 1.0 Missing end attribute delimiter"); vep->state = VEP_TAGERROR; vep->attr_delim = 0; if (vep->attr_vsb != NULL) { AZ(VSB_finish(vep->attr_vsb)); VSB_delete(vep->attr_vsb); vep->attr_vsb = NULL; } } else if (p < e) { vep->attr_delim = 0; p++; vep->state = VEP_INTAG; if (vep->attr_vsb != NULL) { AZ(VSB_finish(vep->attr_vsb)); AN(vep->dostuff); vep->dostuff(vep, DO_ATTR); vep->attr_vsb = NULL; } } /****************************************************** * Utility Section */ } else if (vep->state == VEP_MATCH) { /* * Match against a table */ vm = vep_match(vep, p, e); vep->match_hit = vm; if (vm != NULL) { if (vm->match != NULL) p += strlen(vm->match); vep->state = *vm->state; vep->match = NULL; vep->tag_i = 0; } else { memcpy(vep->tag, p, e - p); vep->tag_i = e - p; vep->state = VEP_MATCHBUF; p = e; } } else if (vep->state == VEP_MATCHBUF) { /* * Match against a table while split over input * sections. */ AN(vep->match); do { if (*p == '>') { for (vm = vep->match; vm->match != NULL; vm++) continue; AZ(vm->match); } else { vep->tag[vep->tag_i++] = *p++; vm = vep_match(vep, vep->tag, vep->tag + vep->tag_i); if (vm && vm->match == NULL) { vep->tag_i--; p--; } } } while (vm == NULL && p < e); vep->match_hit = vm; if (vm == NULL) { assert(p == e); } else { vep->state = *vm->state; vep->match = NULL; } } else if (vep->state == VEP_UNTIL) { /* * Skip until we see magic string */ while (p < e) { if (*p++ != *vep->until_p++) { vep->until_p = vep->until; } else if (*vep->until_p == '\0') { vep->state = vep->until_s; break; } } if (p == e && !vep->remove) vep_mark_verbatim(vep, p); } else { Debug("*** Unknown state %s\n", vep->state); INCOMPL(); } } /* * We must always mark up the storage we got, try to do so * in the most efficient way, in particular with respect to * minimizing and limiting use of pending. */ if (p == vep->ver_p) ; else if (vep->in_esi_tag) vep_mark_skip(vep, p); else if (vep->remove) vep_mark_skip(vep, p); else vep_mark_pending(vep, p); }