tree xml_html_parser::parse_doctype () { s += 9; tree dt= tuple ("doctype"); skip_space (); dt << parse_name (); skip_space (); if (test (s, "SYSTEM")) dt << parse_system (); else if (test (s, "PUBLIC")) dt << parse_public (); skip_space (); if (test (s, "[")) { s += 1; while (s) { skip_space (); if (test (s, "]")) { s += 1; break; } else if (test (s, "<!ELEMENT")) dt << parse_element (); else if (test (s, "<!ATTLIST")) dt << parse_cdata (); else if (test (s, "<!ENTITY")) parse_entity_decl (); else if (test (s, "<!NOTATION")) a << parse_notation (); else if (test (s, "<?")) dt << parse_pi (); else if (test (s, "<!--")) dt << parse_comment (); else if (s[0] == '&' || s[0] == '%') (void) parse_entity (); else s += 1; } } skip_space (); if (test (s, ">")) s += 1; return dt; }
virtual bool parse(const char * ptr) { pfc::string8 name, value; m_autoprobe = true; bool head_suffix_found = false; bool body_suffix_found = false; while (parse_entity(ptr, name, value)) { for (;;) { if (!head_suffix_found && (!pfc::stringCompareCaseInsensitive(name, "head-suffix") || !pfc::stringCompareCaseInsensitive(name, "head"))) { m_head.suffix = value; m_autoprobe = false; head_suffix_found = true; break; } if (!body_suffix_found && (!pfc::stringCompareCaseInsensitive(name, "body-suffix") || !pfc::stringCompareCaseInsensitive(name, "body"))) { m_body.suffix = value; m_autoprobe = false; body_suffix_found = true; break; } break; } if (body_suffix_found && head_suffix_found) { break; } /* if (!pfc::stringCompareCaseInsensitive(name, "head-suffix")) { m_head.suffix = value; m_autoprobe = false; } else if (!pfc::stringCompareCaseInsensitive(name, "body-suffix")) { m_body.suffix = value; m_autoprobe = false; } else { // ignore unknown entities //return false; }*/ } if (!m_autoprobe && m_head.suffix == m_body.suffix) return false; return true; }
void xml_html_parser::parse () { string r; while (s) { if (s[0] == '<') { if (N(r) != 0) { a << tree (r); } if (test (s, "</")) a << parse_closing (); else if (test (s, "<?")) a << parse_pi (); else if (test (s, "<!--")) a << parse_comment (); else if (test (s, "<![CDATA[")) a << parse_cdata (); else if (test (s, "<!DOCTYPE")) a << parse_doctype (); else if (test (s, "<!")) a << parse_misc (); else a << parse_opening (); r= ""; } else if (s[0] == '&') r << parse_entity (); else r << s->read (1); } if (N(r) != 0) a << tree (r); }
/* * Main entrypoint */ int main(int argc, char **argv) { int curarg; unsigned char input_map[256]; unsigned char output_map[256]; unsigned char input_map_set[256]; unsigned char output_map_set[256]; unsigned char *p; int i; osfildef *fp; char *infile; char *outfile; int linenum; static char sig[] = CMAP_SIG_S100; int strict_mode = FALSE; char id[5]; char ldesc[CMAP_LDESC_MAX_LEN + 1]; size_t len; unsigned char lenbuf[2]; char *sys_info; entity_map_t *entity_first; entity_map_t *entity_last; /* no parameters have been specified yet */ memset(id, 0, sizeof(id)); ldesc[0] = '\0'; sys_info = 0; /* we have no entities in our entity mapping list yet */ entity_first = entity_last = 0; /* scan options */ for (curarg = 1 ; curarg < argc && argv[curarg][0] == '-' ; ++curarg) { if (!stricmp(argv[curarg], "-strict")) { /* they want extra warnings */ strict_mode = TRUE; } else { /* consume all remaining options so we get a usage message */ curarg = argc; break; } } /* check for required arguments */ if (curarg + 1 >= argc) { printf("usage: mkchrtab [options] <source> <dest>\n" " <source> is the input file\n" " <dest> is the output file\n" "Options:\n" " -strict warn if any codes 128-255 are unassigned\n"); #if 0 /* * The information about what goes in the file made the message way too * long, so this has been removed. Users will want to the documentation * instead of the usage message for information this detailed, so it * didn't seem useful to keep it in here. */ printf("\n" "The source file contains one entry per line, as follows:\n" "\n" "Set the internal character set identifier, which can be up " "to four letters long\n" "(note that the mapping file MUST contain an ID entry):\n" " ID = id\n" "\n"); printf("Set the internal character set's full display name:\n" " LDESC = full name of character set\n" "\n" "Set system-dependent extra information (the meaning varies " "by system):\n" " EXTRA_SYSTEM_INFO = info-string\n" "\n" "Set the native default character:\n" " NATIVE_DEFAULT = charval\n" "Set the internal default character:\n" " INTERNAL_DEFAULT = charval\n"); printf("Load Unicode mapping files:\n" " UNICODE NATIVE=native-mapping INTERNAL=internal-mapping\n" "\n" "Reversibly map a native character code to an internal code:\n" " native <-> internal\n" "\n" "Map a native code to an internal code, and map the internal " "code back\nto a different native code:\n" " native -> internal -> native\n" "\n" "Map a native code to an internal code, where the internal " "code is already\nmapped to a native code by a previous line:\n" " native -> internal\n" "\n"); printf("Map an internal code to a native code, where the native " "code is already\nmapped to an internal code by a previous " "line:\n" " native <- internal\n" "\n" "Map an HTML entity name to a native code or string:\n" " &entity = internal-code [internal-code ...]\n" "\n" "Numbers can be specified in decimal (default), octal (by " "prefixing the number\nwith a zero, as in '037'), or hex (by " "prefixing the number with '0x', as in\n'0xb2'). A number " "can also be entered as a character by enclosing the\n" "character in single quotes.\n" "\n" "Blank lines and lines starting with a pound sign ('#') are " "ignored.\n"); #endif /* 0 */ os_term(OSEXFAIL); } /* get the input and output filenames */ infile = argv[curarg]; outfile = argv[curarg + 1]; /* * initialize the tables - by default, a character code in one * character set maps to the same code in the other character set */ for (p = input_map, i = 0 ; i < sizeof(input_map)/sizeof(input_map[0]) ; ++i, ++p) *p = (unsigned char)i; for (p = output_map, i = 0 ; i < sizeof(output_map)/sizeof(output_map[0]) ; ++i, ++p) *p = (unsigned char)i; /* * initialize the "set" flags all to false, since we haven't set any * of the values yet -- we'll use these flags to detect when the * user attempts to set the same value more than once, so that we * can issue a warning (multiple mappings are almost certainly in * error) */ for (i = 0 ; i < sizeof(input_map_set)/sizeof(input_map_set[0]) ; ++i) input_map_set[i] = output_map_set[i] = FALSE; /* open the input file */ fp = osfoprs(infile, OSFTTEXT); if (fp == 0) { printf("error: unable to open input file \"%s\"\n", infile); os_term(OSEXFAIL); } /* parse the input file */ for (linenum = 1 ; ; ++linenum) { char buf[256]; char *p; unsigned int n1, n2, n3; int set_input; int set_output; /* presume we're going to set both values */ set_input = set_output = TRUE; /* read the next line */ if (osfgets(buf, sizeof(buf), fp) == 0) break; /* scan off leading spaces */ for (p = buf ; isspace(*p) ; ++p) ; /* if this line is blank, or starts with a '#', ignore it */ if (*p == '\0' || *p == '\n' || *p == '\r' || *p == '#') continue; /* check for special directives */ if (isalpha(*p) || *p == '_') { char *sp; char *val; size_t vallen; size_t idlen; /* find the end of the directive name */ for (sp = p ; isalpha(*sp) || *sp == '_' ; ++sp) ; idlen = sp - p; /* find the equals sign, if present */ for (val = sp ; isspace(*val) ; ++val) ; if (*val == '=') { /* skip the '=' and any spaces that follow */ for (++val ; isspace(*val) ; ++val) ; /* find the end of the value */ for (sp = val ; *sp != '\n' && *sp != '\r' && *sp != '\0' ; ++sp) ; /* note its length */ vallen = sp - val; } else { /* there's no value */ val = 0; } /* see what we have */ if (id_matches(p, idlen, "id")) { /* this directive requires a value */ if (val == 0) goto val_required; /* ID's can never be more than four characters long */ if (vallen > 4) { printf("%s: line %d: ID too long - no more than four " "characters are allowed\n", infile, linenum); } else { /* remember the ID */ memcpy(id, val, vallen); id[vallen] = '\0'; } } else if (id_matches(p, idlen, "ldesc")) { /* this directive requires a value */ if (val == 0) goto val_required; /* make sure it fits */ if (vallen > sizeof(ldesc) - 1) { printf("%s: line %d: LDESC too long - no more than %u " "characters are allowed\n", infile, linenum, sizeof(ldesc) - 1); } else { /* remember the ldesc */ memcpy(ldesc, val, vallen); ldesc[vallen] = '\0'; } } else if (id_matches(p, idlen, "extra_system_info")) { /* this directive requires a value */ if (val == 0) goto val_required; /* allocate space for it */ sys_info = (char *)malloc(vallen + 1); memcpy(sys_info, val, vallen); sys_info[vallen] = '\0'; } else if (id_matches(p, idlen, "native_default")) { unsigned int nval; int i; /* this directive requires a value */ if (val == 0) goto val_required; /* parse the character value */ if (read_number(&nval, &val, infile, linenum, TRUE)) continue; /* apply the default */ for (i = 128 ; i < 256 ; ++i) { /* set the default only if we haven't mapped this one */ if (!output_map_set[i]) output_map[i] = nval; } } else if (id_matches(p, idlen, "internal_default")) { unsigned int nval; int i; /* this directive requires a value */ if (val == 0) goto val_required; /* parse the character value */ if (read_number(&nval, &val, infile, linenum, TRUE)) continue; /* apply the default */ for (i = 128 ; i < 256 ; ++i) { /* apply the default only if we haven't set this one */ if (!input_map_set[i]) input_map[i] = nval; } } else if (id_matches(p, idlen, "unicode")) { /* skip the 'unicode' string and any intervening spaces */ for (p += idlen ; isspace(*p) ; ++p) ; /* parse the unicode files */ parse_unicode_files(p, strlen(p), infile, linenum, input_map, input_map_set, output_map, output_map_set, &entity_first, &entity_last); } else { /* unknown directive */ printf("%s: line %d: invalid directive '%.*s'\n", infile, linenum, idlen, p); } /* done processing this line */ continue; /* come here if the directive needs a value and there isn't one */ val_required: printf("%s: line %d: '=' required with directive '%.*s'\n", infile, linenum, idlen, p); continue; } /* check for an entity name */ if (*p == '&') { entity_map_t *mapp; /* skip the '&' */ ++p; /* * parse the entity - if it succeeds, link the resulting * mapping entry into our list */ mapp = parse_entity(p, infile, linenum); if (mapp != 0) { if (entity_last == 0) entity_first = mapp; else entity_last->nxt = mapp; entity_last = mapp; } /* done */ continue; } /* read the first number */ if (read_number(&n1, &p, infile, linenum, TRUE)) continue; /* determine which operator we have */ if (*p == '<') { /* make sure it's "<->" or "<-" */ if (*(p+1) == '-' && *(p+2) != '>') { /* skip the operator */ p += 2; /* * This is a "from" translation - it only affects the * output mapping from the internal character set to the * native character set. Read the second number. There * is no third number, since we don't want to change the * input mapping. */ if (read_number(&n2, &p, infile, linenum, TRUE)) continue; /* * The forward translation is not affected; set only the * output translation. Note that the first number was * the output (native) value for the internal index in * the second number, so move the first value to n3. */ n3 = n1; set_input = FALSE; } else if (*(p+1) == '-' && *(p+2) == '>') { /* skip it */ p += 3; /* * this is a reversible translation, so we only need one * more number - the third number is implicitly the same * as the first */ n3 = n1; if (read_number(&n2, &p, infile, linenum, TRUE)) continue; } else { printf("%s: line %d: invalid operator - expected <->\n", infile, linenum); continue; } } else if (*p == '-') { /* make sure it's "->" */ if (*(p+1) != '>') { printf("%s: line %d: invalid operator - expected ->\n", infile, linenum); continue; } /* skip it */ p += 2; /* get the next number */ if (read_number(&n2, &p, infile, linenum, TRUE)) continue; /* * we may or may not have a third number - if we have * another -> operator, read the third number; if we don't, * the reverse translation is not affected by this entry */ if (*p == '-') { /* make sure it's "->" */ if (*(p+1) != '>') { printf("%s: line %d: invalid operator - expected ->\n", infile, linenum); continue; } /* skip it */ p += 2; /* read the third number */ if (read_number(&n3, &p, infile, linenum, TRUE)) continue; } else { /* * There's no third number - the reverse translation is * not affected by this line. */ set_output = FALSE; } } else { printf("%s: line %d: invalid operator - expected " "-> or <-> or <-\n", infile, linenum); continue; } /* make sure we're at the end of the line, and warn if not */ if (*p != '\0' && *p != '\n' && *p != '\r' && *p != '#') printf("%s: line %d: extra characters at end of line ignored\n", infile, linenum); /* set the input mapping, if necessary */ if (set_input) { /* warn the user if this value has already been set before */ if (input_map_set[n1]) printf("%s: line %d: warning - native character %u has " "already been\n mapped to internal value %u\n", infile, linenum, n1, input_map[n1]); /* set it */ input_map[n1] = n2; /* note that it's been set */ input_map_set[n1] = TRUE; } /* set the output mapping, if necessary */ if (set_output) { /* warn the user if this value has already been set before */ if (output_map_set[n2]) printf("%s: line %d: warning - internal character %u has " "already been\n mapped to native value %u\n", infile, linenum, n2, input_map[n2]); /* set it */ output_map[n2] = n3; /* note that it's been set */ output_map_set[n2] = TRUE; } } /* we're done with the input file */ osfcls(fp); /* * It's an error if we didn't get an ID or LDESC */ if (id[0] == '\0') { printf("Error: No ID was specified. An ID is required.\n"); os_term(OSEXFAIL); } else if (ldesc[0] == '\0') { printf("Error: No LDESC was specified. An LDESC is required.\n"); os_term(OSEXFAIL); } /* open the output file */ fp = osfopwb(outfile, OSFTCMAP); if (fp == 0) { printf("error: unable to open output file \"%s\"\n", outfile); os_term(OSEXFAIL); } /* write our signature */ if (osfwb(fp, sig, sizeof(sig))) printf("error writing signature to output file\n"); /* write the ID and LDESC */ len = strlen(ldesc) + 1; oswp2(lenbuf, len); if (osfwb(fp, id, 4) || osfwb(fp, lenbuf, 2) || osfwb(fp, ldesc, len)) printf("error writing ID information to output file\n"); /* write the mapping tables */ if (osfwb(fp, input_map, sizeof(input_map)) || osfwb(fp, output_map, sizeof(output_map))) printf("error writing character maps to output file\n"); /* write the extra system information if present */ if (sys_info != 0) { /* write it out, with the "SYSI" flag so we know it's there */ len = strlen(sys_info) + 1; oswp2(lenbuf, len); if (osfwb(fp, "SYSI", 4) || osfwb(fp, lenbuf, 2) || osfwb(fp, sys_info, len)) printf("error writing EXTRA_SYSTEM_INFO to output file\n"); /* we're done with the allocated buffer now */ free(sys_info); } /* * Write the entity mapping list, if we have any entities */ if (entity_first != 0) { entity_map_t *entp; entity_map_t *next_entity; char lenbuf[2]; char cvalbuf[2]; /* write out the entity list header */ if (osfwb(fp, "ENTY", 4)) printf("error writing entity marker to output file\n"); /* run through the list, writing out each entry */ for (entp = entity_first ; entp != 0 ; entp = next_entity) { /* write out this entity */ oswp2(lenbuf, entp->exp_len); oswp2(cvalbuf, entp->html_char); if (osfwb(fp, lenbuf, 2) || osfwb(fp, cvalbuf, 2) || osfwb(fp, entp->expansion, entp->exp_len)) { printf("error writing entity mapping to output file\n"); break; } /* remember the next entity before we delete this one */ next_entity = entp->nxt; /* we're done with this entity, so we can delete it now */ free(entp); } /* * write out the end marker, which is just a length marker and * character marker of zero */ oswp2(lenbuf, 0); oswp2(cvalbuf, 0); if (osfwb(fp, lenbuf, 2) || osfwb(fp, cvalbuf, 2)) printf("error writing entity list end marker to output file\n"); } /* write the end-of-file marker */ if (osfwb(fp, "$EOF", 4)) printf("error writing end-of-file marker to output file\n"); /* done with the output file */ osfcls(fp); /* if we're in strict mode, check for unassigned mappings */ if (strict_mode) { int in_cnt, out_cnt; int cnt; /* count unassigned characters */ for (i = 128, in_cnt = out_cnt = 0 ; i < 256 ; ++i) { if (!input_map_set[i]) ++in_cnt; if (!output_map_set[i]) ++out_cnt; } /* if we have any unassigned native characters, list them */ if (in_cnt != 0) { printf("\nUnassigned native -> internal mappings:\n "); for (i = 128, cnt = 0 ; i < 256 ; ++i) { if (!input_map_set[i]) { /* go to a new line if necessary */ if (cnt >= 16) { printf("\n "); cnt = 0; } /* display this item */ printf("%3d ", i); ++cnt; } } printf("\n"); } /* list unassigned internal characters */ if (out_cnt != 0) { printf("\nUnassigned internal -> native mappings:\n "); for (i = 128, cnt = 0 ; i < 256 ; ++i) { if (!output_map_set[i]) { /* go to a new line if necessary */ if (cnt >= 16) { printf("\n "); cnt = 0; } /* display this item */ printf("%3d ", i); ++cnt; } } printf("\n"); } } /* success */ os_term(OSEXSUCC); return OSEXSUCC; }
static int parse_tag(XMLState *s, const char *buf) { char tag[256], *q, len, eot; char attr_name[256]; char value[2048]; const char *p; CSSIdent css_tag; CSSBox *box, *box1; CSSAttribute *first_attr, **pattr, *attr; p = buf; /* ignore XML commands */ if (p[0] == '!' || p[0] == '?') return XML_STATE_TEXT; /* end of tag check */ eot = 0; if (*p == '/') { p++; eot = 1; } /* parse the tag name */ get_str(&p, tag, sizeof(tag), "/"); if (tag[0] == '\0') { /* special closing tag */ if (eot) { css_tag = CSS_ID_NIL; goto end_of_tag; } else { xml_error(s, "invalid null tag"); return XML_STATE_TEXT; } } if (s->ignore_case) css_strtolower(tag, sizeof(tag)); css_tag = css_new_ident(tag); /* XXX: should test html_syntax, but need more patches */ if (s->is_html && (css_tag == CSS_ID_style || css_tag == CSS_ID_script)) goto pretag; if (eot) goto end_of_tag; /* parse attributes */ first_attr = NULL; pattr = &first_attr; for (;;) { skip_spaces(&p); if (*p == '\0' || *p == '/') break; get_str(&p, attr_name, sizeof(attr_name), "=/"); if (s->ignore_case) css_strtolower(attr_name, sizeof(attr_name)); if (*p == '=') { int och, ch; p++; skip_spaces(&p); och = *p; /* in html, we can put non string values */ if (och != '\'' && och != '\"') { if (!s->html_syntax) xml_error(s, "string expected for attribute '%s'", attr_name); q = value; while (*p != '\0' && !strchr(" \t\n\r<>", *p)) { ch = parse_entity(&p); if ((q - value) < (int)sizeof(value) - 1) *q++ = ch; } *q = '\0'; } else { p++; q = value; while (*p != och && *p != '\0' && *p != '<') { ch = parse_entity(&p); if ((q - value) < (int)sizeof(value) - 1) *q++ = ch; } *q = '\0'; if (*p != och) { xml_error(s, "malformed string in attribute '%s'", attr_name); } else { p++; } } } else { value[0] = '\0'; } attr = box_new_attr(css_new_ident(attr_name), value); if (attr) { *pattr = attr; pattr = &attr->next; } } /* close some tags (correct HTML mistakes) */ if (s->html_syntax) { CSSBox *box1; const HTMLClosedTags *ct; ct = html_closed_tags; for (;;) { if (!ct->tag) break; if (css_tag == ct->tag) { box1 = s->box; while (box1 != NULL && css_get_enum(css_ident_str(box1->tag), ct->tag_closed) >= 0) { html_eval_tag(s, box1); box1 = box1->parent; } if (box1) { s->box = box1; } break; } ct++; } } /* create the new box and add it */ box = css_new_box(css_tag, NULL); box->attrs = first_attr; if (!s->box) { s->root_box = box; } else { css_make_child_box(s->box); css_add_box(s->box, box); } s->box = box; if ((s->flags & XML_DOCBOOK) && css_tag == CSS_ID_programlisting) { pretag: pstrcpy(s->pretag, sizeof(s->pretag), tag); s->pretaglen = strlen(s->pretag); return XML_STATE_PRETAG; } len = strlen(buf); /* end of tag. If html, check also some common mistakes. FORM is considered as self closing to avoid any content problems */ if ((len > 0 && buf[len - 1] == '/') || (s->html_syntax && (css_tag == CSS_ID_br || css_tag == CSS_ID_hr || css_tag == CSS_ID_meta || css_tag == CSS_ID_link || css_tag == CSS_ID_form || css_tag == CSS_ID_base || css_tag == CSS_ID_input || css_tag == CSS_ID_basefont || css_tag == CSS_ID_img))) { end_of_tag: box1 = s->box; if (box1) { if (s->html_syntax) { if (css_tag != CSS_ID_NIL) { /* close all non matching tags */ while (box1 != NULL && box1->tag != css_tag) { html_eval_tag(s, box1); box1 = box1->parent; } } if (!box1) { if (css_tag != CSS_ID_form) xml_error(s, "unmatched closing tag </%s>", css_ident_str(css_tag)); } else { html_eval_tag(s, box1); s->box = box1->parent; } } else { if (css_tag != CSS_ID_NIL && box1->tag != css_tag) { xml_error(s, "unmatched closing tag </%s> for <%s>", css_ident_str(css_tag), css_ident_str(box1->tag)); } else { if (s->is_html) html_eval_tag(s, box1); s->box = box1->parent; } } } } return XML_STATE_TEXT; }
static int xml_parse_internal(XMLState *s, const char *buf_start, int buf_len, EditBuffer *b, int offset_start) { int ch, offset, offset0, text_offset_start, ret, offset_end; const char *buf_end, *buf; buf = buf_start; buf_end = buf + buf_len; offset = offset_start; offset_end = offset_start + buf_len; offset0 = 0; /* not used */ text_offset_start = 0; /* not used */ for (;;) { if (buf) { if (buf >= buf_end) break; ch = charset_decode(&s->charset_state, &buf); } else { if (offset >= offset_end) break; offset0 = offset; ch = eb_nextc(b, offset, &offset); } /* increment line number to signal errors */ if (ch == '\n') { /* well, should add counter, but we test abort here */ if (s->abort_func(s->abort_opaque)) return -1; s->line_num++; } switch (s->state) { case XML_STATE_TAG: if (ch == '>') { strbuf_addch(&s->str, '\0'); ret = parse_tag(s, (char *)s->str.buf); switch (ret) { default: case XML_STATE_TEXT: xml_text: strbuf_reset(&s->str); s->state = XML_STATE_TEXT; text_offset_start = offset; break; case XML_STATE_PRETAG: strbuf_reset(&s->str); s->state = XML_STATE_PRETAG; text_offset_start = offset; break; } } else { strbuf_addch(&s->str, ch); /* test comment */ if (s->str.size == 3 && s->str.buf[0] == '!' && s->str.buf[1] == '-' && s->str.buf[2] == '-') { s->state = XML_STATE_COMMENT; } } break; case XML_STATE_TEXT: if (ch == '<') { /* XXX: not strictly correct with comments : should not flush if comment */ if (buf) { strbuf_addch(&s->str, '\0'); flush_text(s, (char *)s->str.buf); strbuf_reset(&s->str); } else { flush_text_buffer(s, text_offset_start, offset0); } s->state = XML_STATE_TAG; } else { if (buf) { /* evaluate entities */ if (ch == '&') { buf--; ch = parse_entity(&buf); } strbuf_addch(&s->str, ch); } } break; case XML_STATE_COMMENT: if (ch == '-') s->state = XML_STATE_COMMENT1; break; case XML_STATE_COMMENT1: if (ch == '-') s->state = XML_STATE_COMMENT2; else s->state = XML_STATE_COMMENT; break; case XML_STATE_COMMENT2: if (ch == '>') { goto xml_text; } else if (ch != '-') { s->state = XML_STATE_COMMENT; } break; case XML_STATE_PRETAG: { int len, taglen; strbuf_addch(&s->str, ch); taglen = s->pretaglen + 2; len = s->str.size - taglen; if (len >= 0 && s->str.buf[len] == '<' && s->str.buf[len + 1] == '/' && !xml_tagcmp((char *)s->str.buf + len + 2, s->pretag)) { s->str.buf[len] = '\0'; if (!xml_tagcmp(s->pretag, "style")) { if (s->style_sheet) { CSSParseState b1, *b = &b1; b->ptr = (char *)s->str.buf; b->line_num = s->line_num; /* XXX: incorrect */ b->filename = s->filename; b->ignore_case = s->ignore_case; css_parse_style_sheet(s->style_sheet, b); } } else if (!xml_tagcmp(s->pretag, "script")) { /* XXX: handle script */ } else { /* just add the content */ if (buf) { flush_text(s, (char *)s->str.buf); } else { /* XXX: would be incorrect if non ascii chars */ flush_text_buffer(s, text_offset_start, offset - taglen); } strbuf_reset(&s->str); if (s->box) s->box = s->box->parent; } s->state = XML_STATE_WAIT_EOT; } } break; case XML_STATE_WAIT_EOT: /* wait end of tag */ if (ch == '>') goto xml_text; break; } } return buf - buf_start; }
virtual bool parse(const char * ptr) { pfc::string8 name, value; m_loopstart = 0; m_looplength = 0; m_by_meta = true; bool loopstart_found = false; bool looplength_found = false; bool loopend_found = false; t_uint64 m_loopend = 0; // [0-9]+ [0-9]+ if (pfc::string_find_first(ptr, '=') == ~0) { char tmp; t_size n = 0; while (tmp = *ptr, tmp && !pfc::char_is_ascii_alphanumeric(tmp)) ++ptr; while (tmp = ptr[n], pfc::char_is_ascii_alphanumeric(tmp)) n++; if (!tmp) return true; m_loopstart = pfc::atoui64_ex(ptr, n); ptr += n; while (tmp = *ptr, tmp && !pfc::char_is_ascii_alphanumeric(tmp)) ++ptr; if (!tmp) return true; n = 0; while (tmp = ptr[n], pfc::char_is_ascii_alphanumeric(tmp)) n++; m_looplength = pfc::atoui64_ex(ptr, n); if (m_looplength) m_by_meta = false; return true; } while (parse_entity(ptr, name, value)) { for (;;) { if (!loopstart_found && (!pfc::stringCompareCaseInsensitive(name, "LOOPSTART") || !pfc::stringCompareCaseInsensitive(name, "START"))) { m_loopstart = pfc::atoui64_ex(value, ~0); loopstart_found = true; break; } if (!looplength_found && (!pfc::stringCompareCaseInsensitive(name, "LOOPLENGTH") || !pfc::stringCompareCaseInsensitive(name, "LENGTH"))) { m_looplength = pfc::atoui64_ex(value, ~0); if (m_looplength) looplength_found = true; break; } if (!loopend_found && (!pfc::stringCompareCaseInsensitive(name, "LOOPEND") || !pfc::stringCompareCaseInsensitive(name, "END"))) { m_loopend = pfc::atoui64_ex(value, ~0); if (m_loopend > m_loopstart) loopend_found = true; break; } break; } if (loopstart_found) { if (looplength_found) { m_by_meta = false; break; } if (loopend_found) { if (m_loopend > m_loopstart) { m_looplength = m_loopend - m_loopstart; m_by_meta = false; break; } else { loopend_found = false; } } } } return true; }