Exemple #1
0
tree
xml_html_parser::parse_doctype () {
  s += 9;
  tree dt= tuple ("doctype");
  skip_space ();
  dt << parse_name ();
  skip_space ();
  if (test (s, "SYSTEM")) dt << parse_system ();
  else if (test (s, "PUBLIC")) dt << parse_public ();
  skip_space ();

  if (test (s, "[")) {
    s += 1;
    while (s) {
      skip_space ();
      if (test (s, "]")) { s += 1; break; }
      else if (test (s, "<!ELEMENT")) dt << parse_element ();
      else if (test (s, "<!ATTLIST")) dt << parse_cdata ();
      else if (test (s, "<!ENTITY")) parse_entity_decl ();
      else if (test (s, "<!NOTATION")) a << parse_notation ();
      else if (test (s, "<?")) dt << parse_pi ();
      else if (test (s, "<!--")) dt << parse_comment ();
      else if (s[0] == '&' || s[0] == '%') (void) parse_entity ();
      else s += 1;
    }
  }

  skip_space ();
  if (test (s, ">")) s += 1;
  return dt;
}
Exemple #2
0
	virtual bool parse(const char * ptr) {
		pfc::string8 name, value;
		m_autoprobe = true;
		bool head_suffix_found = false;
		bool body_suffix_found = false;
		while (parse_entity(ptr, name, value)) {
			for (;;)
			{
				if (!head_suffix_found && (!pfc::stringCompareCaseInsensitive(name, "head-suffix") || !pfc::stringCompareCaseInsensitive(name, "head")))
				{
					m_head.suffix = value;
					m_autoprobe = false;
					head_suffix_found = true;
					break;
				}
				if (!body_suffix_found && (!pfc::stringCompareCaseInsensitive(name, "body-suffix") || !pfc::stringCompareCaseInsensitive(name, "body")))
				{
					m_body.suffix = value;
					m_autoprobe = false;
					body_suffix_found = true;
					break;
				}
				break;
			}
			if (body_suffix_found && head_suffix_found)
			{
				break;
			}
			/*
			if (!pfc::stringCompareCaseInsensitive(name, "head-suffix")) {
				m_head.suffix = value;
				m_autoprobe = false;
			} else if (!pfc::stringCompareCaseInsensitive(name, "body-suffix")) {
				m_body.suffix = value;
				m_autoprobe = false;
			} else {
				// ignore unknown entities
				//return false;
			}*/
		}
		if (!m_autoprobe && m_head.suffix == m_body.suffix) return false;
		return true;
	}
Exemple #3
0
void
xml_html_parser::parse () {
  string r;
  while (s) {
    if (s[0] == '<') {
      if (N(r) != 0) { a << tree (r); }
      if (test (s, "</")) a << parse_closing ();
      else if (test (s, "<?")) a << parse_pi ();
      else if (test (s, "<!--")) a << parse_comment ();
      else if (test (s, "<![CDATA[")) a << parse_cdata ();
      else if (test (s, "<!DOCTYPE")) a << parse_doctype ();
      else if (test (s, "<!")) a << parse_misc ();
      else a << parse_opening ();
      r= "";
    }
    else if (s[0] == '&') r << parse_entity ();
    else r << s->read (1);
  }
  if (N(r) != 0) a << tree (r);
}
Exemple #4
0
/*
 *   Main entrypoint 
 */
int main(int argc, char **argv)
{
    int curarg;
    unsigned char input_map[256];
    unsigned char output_map[256];
    unsigned char input_map_set[256];
    unsigned char output_map_set[256];
    unsigned char *p;
    int i;
    osfildef *fp;
    char *infile;
    char *outfile;
    int linenum;
    static char sig[] = CMAP_SIG_S100;
    int strict_mode = FALSE;
    char id[5];
    char ldesc[CMAP_LDESC_MAX_LEN + 1];
    size_t len;
    unsigned char lenbuf[2];
    char *sys_info;
    entity_map_t *entity_first;
    entity_map_t *entity_last;

    /* no parameters have been specified yet */
    memset(id, 0, sizeof(id));
    ldesc[0] = '\0';
    sys_info = 0;

    /* we have no entities in our entity mapping list yet */
    entity_first = entity_last = 0;

    /* scan options */
    for (curarg = 1 ; curarg < argc && argv[curarg][0] == '-' ; ++curarg)
    {
        if (!stricmp(argv[curarg], "-strict"))
        {
            /* they want extra warnings */
            strict_mode = TRUE;
        }
        else
        {
            /* consume all remaining options so we get a usage message */
            curarg = argc;
            break;
        }
    }

    /* check for required arguments */
    if (curarg + 1 >= argc)
    {
        printf("usage: mkchrtab [options] <source> <dest>\n"
               "  <source> is the input file\n"
               "  <dest> is the output file\n"
               "Options:\n"
               "  -strict   warn if any codes 128-255 are unassigned\n");
#if 0
/* 
 *   The information about what goes in the file made the message way too
 *   long, so this has been removed.  Users will want to the documentation
 *   instead of the usage message for information this detailed, so it
 *   didn't seem useful to keep it in here.  
 */
        printf("\n"
               "The source file contains one entry per line, as follows:\n"
               "\n"
               "Set the internal character set identifier, which can be up "
               "to four letters long\n"
               "(note that the mapping file MUST contain an ID entry):\n"
               "   ID = id\n"
               "\n");
        printf("Set the internal character set's full display name:\n"
               "   LDESC = full name of character set\n"
               "\n"
               "Set system-dependent extra information (the meaning varies "
               "by system):\n"
               "   EXTRA_SYSTEM_INFO = info-string\n"
               "\n"
               "Set the native default character:\n"
               "   NATIVE_DEFAULT = charval\n"
               "Set the internal default character:\n"
               "   INTERNAL_DEFAULT = charval\n");
        printf("Load Unicode mapping files:\n"
               "   UNICODE NATIVE=native-mapping INTERNAL=internal-mapping\n"
               "\n"
               "Reversibly map a native character code to an internal code:\n"
               "   native <-> internal\n"
               "\n"
               "Map a native code to an internal code, and map the internal "
               "code back\nto a different native code:\n"
               "   native -> internal -> native\n"
               "\n"
               "Map a native code to an internal code, where the internal "
               "code is already\nmapped to a native code by a previous line:\n"
               "   native -> internal\n"
               "\n");
        printf("Map an internal code to a native code, where the native "
               "code is already\nmapped to an internal code by a previous "
               "line:\n"
               "   native <- internal\n"
               "\n"
               "Map an HTML entity name to a native code or string:\n"
               "   &entity = internal-code [internal-code ...]\n"
               "\n"
               "Numbers can be specified in decimal (default), octal (by "
               "prefixing the number\nwith a zero, as in '037'), or hex (by "
               "prefixing the number with '0x', as in\n'0xb2').  A number "
               "can also be entered as a character by enclosing the\n"
               "character in single quotes.\n"
               "\n"
               "Blank lines and lines starting with a pound sign ('#') are "
               "ignored.\n");
#endif /* 0 */
        os_term(OSEXFAIL);
    }

    /* get the input and output filenames */
    infile = argv[curarg];
    outfile = argv[curarg + 1];

    /* 
     *   initialize the tables - by default, a character code in one
     *   character set maps to the same code in the other character set 
     */
    for (p = input_map, i = 0 ; i < sizeof(input_map)/sizeof(input_map[0]) ;
         ++i, ++p)
        *p = (unsigned char)i;

    for (p = output_map, i = 0 ;
         i < sizeof(output_map)/sizeof(output_map[0]) ; ++i, ++p)
        *p = (unsigned char)i;

    /* 
     *   initialize the "set" flags all to false, since we haven't set any
     *   of the values yet -- we'll use these flags to detect when the
     *   user attempts to set the same value more than once, so that we
     *   can issue a warning (multiple mappings are almost certainly in
     *   error) 
     */
    for (i = 0 ; i < sizeof(input_map_set)/sizeof(input_map_set[0]) ; ++i)
        input_map_set[i] = output_map_set[i] = FALSE;

    /* open the input file */
    fp = osfoprs(infile, OSFTTEXT);
    if (fp == 0)
    {
        printf("error: unable to open input file \"%s\"\n", infile);
        os_term(OSEXFAIL);
    }

    /* parse the input file */
    for (linenum = 1 ; ; ++linenum)
    {
        char buf[256];
        char *p;
        unsigned int n1, n2, n3;
        int set_input;
        int set_output;

        /* presume we're going to set both values */
        set_input = set_output = TRUE;

        /* read the next line */
        if (osfgets(buf, sizeof(buf), fp) == 0)
            break;

        /* scan off leading spaces */
        for (p = buf ; isspace(*p) ; ++p) ;

        /* if this line is blank, or starts with a '#', ignore it */
        if (*p == '\0' || *p == '\n' || *p == '\r' || *p == '#')
            continue;

        /* check for special directives */
        if (isalpha(*p) || *p == '_')
        {
            char *sp;
            char *val;
            size_t vallen;
            size_t idlen;
            
            /* find the end of the directive name */
            for (sp = p ; isalpha(*sp) || *sp == '_' ; ++sp) ;
            idlen = sp - p;

            /* find the equals sign, if present */
            for (val = sp ; isspace(*val) ; ++val) ;
            if (*val == '=')
            {
                /* skip the '=' and any spaces that follow */
                for (++val ; isspace(*val) ; ++val) ;

                /* find the end of the value */
                for (sp = val ; *sp != '\n' && *sp != '\r' && *sp != '\0' ;
                     ++sp) ;

                /* note its length */
                vallen = sp - val;
            }
            else
            {
                /* there's no value */
                val = 0;
            }

            /* see what we have */
            if (id_matches(p, idlen, "id"))
            {
                /* this directive requires a value */
                if (val == 0)
                    goto val_required;

                /* ID's can never be more than four characters long */
                if (vallen > 4)
                {
                    printf("%s: line %d: ID too long - no more than four "
                           "characters are allowed\n", infile, linenum);
                }
                else
                {
                    /* remember the ID */
                    memcpy(id, val, vallen);
                    id[vallen] = '\0';
                }
            }
            else if (id_matches(p, idlen, "ldesc"))
            {
                /* this directive requires a value */
                if (val == 0)
                    goto val_required;

                /* make sure it fits */
                if (vallen > sizeof(ldesc) - 1)
                {
                    printf("%s: line %d: LDESC too long - no more than %u "
                           "characters are allowed\n", infile, linenum,
                           sizeof(ldesc) - 1);
                }
                else
                {
                    /* remember the ldesc */
                    memcpy(ldesc, val, vallen);
                    ldesc[vallen] = '\0';
                }
            }
            else if (id_matches(p, idlen, "extra_system_info"))
            {
                /* this directive requires a value */
                if (val == 0)
                    goto val_required;

                /* allocate space for it */
                sys_info = (char *)malloc(vallen + 1);
                memcpy(sys_info, val, vallen);
                sys_info[vallen] = '\0';
            }
            else if (id_matches(p, idlen, "native_default"))
            {
                unsigned int nval;
                int i;

                /* this directive requires a value */
                if (val == 0)
                    goto val_required;

                /* parse the character value */
                if (read_number(&nval, &val, infile, linenum, TRUE))
                    continue;

                /* apply the default */
                for (i = 128 ; i < 256 ; ++i)
                {
                    /* set the default only if we haven't mapped this one */
                    if (!output_map_set[i])
                        output_map[i] = nval;
                }
            }
            else if (id_matches(p, idlen, "internal_default"))
            {
                unsigned int nval;
                int i;

                /* this directive requires a value */
                if (val == 0)
                    goto val_required;

                /* parse the character value */
                if (read_number(&nval, &val, infile, linenum, TRUE))
                    continue;

                /* apply the default */
                for (i = 128 ; i < 256 ; ++i)
                {
                    /* apply the default only if we haven't set this one */
                    if (!input_map_set[i])
                        input_map[i] = nval;
                }
            }
            else if (id_matches(p, idlen, "unicode"))
            {
                /* skip the 'unicode' string and any intervening spaces */
                for (p += idlen ; isspace(*p) ; ++p) ;

                /* parse the unicode files */
                parse_unicode_files(p, strlen(p), infile, linenum,
                                    input_map, input_map_set,
                                    output_map, output_map_set,
                                    &entity_first, &entity_last);
            }
            else
            {
                /* unknown directive */
                printf("%s: line %d: invalid directive '%.*s'\n",
                       infile, linenum, idlen, p);
            }

            /* done processing this line */
            continue;

            /* come here if the directive needs a value and there isn't one */
        val_required:
            printf("%s: line %d: '=' required with directive '%.*s'\n",
                   infile, linenum, idlen, p);
            continue;
        }

        /* check for an entity name */
        if (*p == '&')
        {
            entity_map_t *mapp;

            /* skip the '&' */
            ++p;

            /* 
             *   parse the entity - if it succeeds, link the resulting
             *   mapping entry into our list 
             */
            mapp = parse_entity(p, infile, linenum);
            if (mapp != 0)
            {
                if (entity_last == 0)
                    entity_first = mapp;
                else
                    entity_last->nxt = mapp;
                entity_last = mapp;
            }

            /* done */
            continue;
        }

        /* read the first number */
        if (read_number(&n1, &p, infile, linenum, TRUE))
            continue;

        /* determine which operator we have */
        if (*p == '<')
        {
            /* make sure it's "<->" or "<-" */
            if (*(p+1) == '-' && *(p+2) != '>')
            {
                /* skip the operator */
                p += 2;

                /* 
                 *   This is a "from" translation - it only affects the
                 *   output mapping from the internal character set to the
                 *   native character set.  Read the second number.  There
                 *   is no third number, since we don't want to change the
                 *   input mapping.
                 */
                if (read_number(&n2, &p, infile, linenum, TRUE))
                    continue;

                /* 
                 *   The forward translation is not affected; set only the
                 *   output translation.  Note that the first number was
                 *   the output (native) value for the internal index in
                 *   the second number, so move the first value to n3.  
                 */
                n3 = n1;
                set_input = FALSE;
            }
            else if (*(p+1) == '-' && *(p+2) == '>')
            {
                /* skip it */
                p += 3;

                /* 
                 *   this is a reversible translation, so we only need one
                 *   more number - the third number is implicitly the same
                 *   as the first 
                 */
                n3 = n1;
                if (read_number(&n2, &p, infile, linenum, TRUE))
                    continue;
            }
            else
            {
                printf("%s: line %d: invalid operator - expected <->\n",
                       infile, linenum);
                continue;
            }
        }
        else if (*p == '-')
        {
            /* make sure it's "->" */
            if (*(p+1) != '>')
            {
                printf("%s: line %d: invalid operator - expected ->\n",
                       infile, linenum);
                continue;
            }

            /* skip it */
            p += 2;

            /* get the next number */
            if (read_number(&n2, &p, infile, linenum, TRUE))
                continue;

            /* 
             *   we may or may not have a third number - if we have
             *   another -> operator, read the third number; if we don't,
             *   the reverse translation is not affected by this entry 
             */
            if (*p == '-')
            {
                /* make sure it's "->" */
                if (*(p+1) != '>')
                {
                    printf("%s: line %d: invalid operator - expected ->\n",
                           infile, linenum);
                    continue;
                }

                /* skip it */
                p += 2;

                /* read the third number */
                if (read_number(&n3, &p, infile, linenum, TRUE))
                    continue;
            }
            else
            {
                /*
                 *   There's no third number - the reverse translation is
                 *   not affected by this line.  
                 */
                set_output = FALSE;
            }
        }
        else
        {
            printf("%s: line %d: invalid operator - expected "
                   "-> or <-> or <-\n",
                   infile, linenum);
            continue;
        }

        /* make sure we're at the end of the line, and warn if not */
        if (*p != '\0' && *p != '\n' && *p != '\r' && *p != '#')
            printf("%s: line %d: extra characters at end of line ignored\n",
                   infile, linenum);

        /* set the input mapping, if necessary */
        if (set_input)
        {
            /* warn the user if this value has already been set before */
            if (input_map_set[n1])
                printf("%s: line %d: warning - native character %u has "
                       "already been\n    mapped to internal value %u\n",
                       infile, linenum, n1, input_map[n1]);
            
            /* set it */
            input_map[n1] = n2;

            /* note that it's been set */
            input_map_set[n1] = TRUE;
        }

        /* set the output mapping, if necessary */
        if (set_output)
        {
            /* warn the user if this value has already been set before */
            if (output_map_set[n2])
                printf("%s: line %d: warning - internal character %u has "
                       "already been\n    mapped to native value %u\n",
                       infile, linenum, n2, input_map[n2]);

            /* set it */
            output_map[n2] = n3;

            /* note that it's been set */
            output_map_set[n2] = TRUE;
        }
    }

    /* we're done with the input file */
    osfcls(fp);

    /*
     *   It's an error if we didn't get an ID or LDESC 
     */
    if (id[0] == '\0')
    {
        printf("Error: No ID was specified.  An ID is required.\n");
        os_term(OSEXFAIL);
    }
    else if (ldesc[0] == '\0')
    {
        printf("Error: No LDESC was specified.  An LDESC is required.\n");
        os_term(OSEXFAIL);
    }

    /* open the output file */
    fp = osfopwb(outfile, OSFTCMAP);
    if (fp == 0)
    {
        printf("error: unable to open output file \"%s\"\n", outfile);
        os_term(OSEXFAIL);
    }

    /* write our signature */
    if (osfwb(fp, sig, sizeof(sig)))
        printf("error writing signature to output file\n");

    /* write the ID and LDESC */
    len = strlen(ldesc) + 1;
    oswp2(lenbuf, len);
    if (osfwb(fp, id, 4)
        || osfwb(fp, lenbuf, 2)
        || osfwb(fp, ldesc, len))
        printf("error writing ID information to output file\n");

    /* write the mapping tables */
    if (osfwb(fp, input_map, sizeof(input_map))
        || osfwb(fp, output_map, sizeof(output_map)))
        printf("error writing character maps to output file\n");

    /* write the extra system information if present */
    if (sys_info != 0)
    {
        /* write it out, with the "SYSI" flag so we know it's there */
        len = strlen(sys_info) + 1;
        oswp2(lenbuf, len);
        if (osfwb(fp, "SYSI", 4)
            || osfwb(fp, lenbuf, 2)
            || osfwb(fp, sys_info, len))
            printf("error writing EXTRA_SYSTEM_INFO to output file\n");

        /* we're done with the allocated buffer now */
        free(sys_info);
    }

    /*
     *   Write the entity mapping list, if we have any entities 
     */
    if (entity_first != 0)
    {
        entity_map_t *entp;
        entity_map_t *next_entity;
        char lenbuf[2];
        char cvalbuf[2];

        /* write out the entity list header */
        if (osfwb(fp, "ENTY", 4))
            printf("error writing entity marker to output file\n");

        /* run through the list, writing out each entry */
        for (entp = entity_first ; entp != 0 ; entp = next_entity)
        {
            /* write out this entity */
            oswp2(lenbuf, entp->exp_len);
            oswp2(cvalbuf, entp->html_char);
            if (osfwb(fp, lenbuf, 2)
                || osfwb(fp, cvalbuf, 2)
                || osfwb(fp, entp->expansion, entp->exp_len))
            {
                printf("error writing entity mapping to output file\n");
                break;
            }

            /* remember the next entity before we delete this one */
            next_entity = entp->nxt;

            /* we're done with this entity, so we can delete it now */
            free(entp);
        }

        /* 
         *   write out the end marker, which is just a length marker and
         *   character marker of zero 
         */
        oswp2(lenbuf, 0);
        oswp2(cvalbuf, 0);
        if (osfwb(fp, lenbuf, 2)
            || osfwb(fp, cvalbuf, 2))
            printf("error writing entity list end marker to output file\n");
    }

    /* write the end-of-file marker */
    if (osfwb(fp, "$EOF", 4))
        printf("error writing end-of-file marker to output file\n");

    /* done with the output file */
    osfcls(fp);

    /* if we're in strict mode, check for unassigned mappings */
    if (strict_mode)
    {
        int in_cnt, out_cnt;
        int cnt;

        /* count unassigned characters */
        for (i = 128, in_cnt = out_cnt = 0 ; i < 256 ; ++i)
        {
            if (!input_map_set[i])
                ++in_cnt;
            if (!output_map_set[i])
                ++out_cnt;
        }

        /* if we have any unassigned native characters, list them */
        if (in_cnt != 0)
        {
            printf("\nUnassigned native -> internal mappings:\n    ");
            for (i = 128, cnt = 0 ; i < 256 ; ++i)
            {
                if (!input_map_set[i])
                {
                    /* go to a new line if necessary */
                    if (cnt >= 16)
                    {
                        printf("\n    ");
                        cnt = 0;
                    }

                    /* display this item */
                    printf("%3d ", i);
                    ++cnt;
                }
            }
            printf("\n");
        }

        /* list unassigned internal characters */
        if (out_cnt != 0)
        {
            printf("\nUnassigned internal -> native mappings:\n    ");
            for (i = 128, cnt = 0 ; i < 256 ; ++i)
            {
                if (!output_map_set[i])
                {
                    /* go to a new line if necessary */
                    if (cnt >= 16)
                    {
                        printf("\n    ");
                        cnt = 0;
                    }

                    /* display this item */
                    printf("%3d ", i);
                    ++cnt;
                }
            }
            printf("\n");
        }
    }

    /* success */
    os_term(OSEXSUCC);
    return OSEXSUCC;
}
Exemple #5
0
static int parse_tag(XMLState *s, const char *buf)
{
    char tag[256], *q, len, eot;
    char attr_name[256];
    char value[2048];
    const char *p;
    CSSIdent css_tag;
    CSSBox *box, *box1;
    CSSAttribute *first_attr, **pattr, *attr;

    p = buf;
    
    /* ignore XML commands */
    if (p[0] == '!' || p[0] == '?')
        return XML_STATE_TEXT;

    /* end of tag check */
    eot = 0;
    if (*p == '/') {
        p++;
        eot = 1;
    }

    /* parse the tag name */
    get_str(&p, tag, sizeof(tag), "/");
    if (tag[0] == '\0') {
        /* special closing tag */
        if (eot) {
            css_tag = CSS_ID_NIL;
            goto end_of_tag;
        } else {
            xml_error(s, "invalid null tag");
            return XML_STATE_TEXT;
        }
    }
    if (s->ignore_case)
        css_strtolower(tag, sizeof(tag));
    css_tag = css_new_ident(tag);
    
    /* XXX: should test html_syntax, but need more patches */
    if (s->is_html && (css_tag == CSS_ID_style || 
                       css_tag == CSS_ID_script)) 
        goto pretag;
    if (eot)
        goto end_of_tag;

    /* parse attributes */
    first_attr = NULL;
    pattr = &first_attr;
    for (;;) {
        skip_spaces(&p);
        if (*p == '\0' || *p == '/')
            break;
        get_str(&p, attr_name, sizeof(attr_name), "=/");
        if (s->ignore_case)
            css_strtolower(attr_name, sizeof(attr_name));
        if (*p == '=') {
            int och, ch;
            p++;
            skip_spaces(&p);
            och = *p;
            /* in html, we can put non string values */
            if (och != '\'' && och != '\"') {
                if (!s->html_syntax)
                    xml_error(s, "string expected for attribute '%s'", attr_name);
                q = value;
                while (*p != '\0' && !strchr(" \t\n\r<>", *p)) {
                    ch = parse_entity(&p);
                    if ((q - value) < (int)sizeof(value) - 1) 
                        *q++ = ch;
                }
                *q = '\0';
            } else {
                p++;
                q = value;
                while (*p != och && *p != '\0' && *p != '<') {
                    ch = parse_entity(&p);
                    if ((q - value) < (int)sizeof(value) - 1) 
                        *q++ = ch;
                }
                *q = '\0';
                if (*p != och) {
                    xml_error(s, "malformed string in attribute '%s'", attr_name);
                } else {
                    p++;
                }
            }
        } else {
            value[0] = '\0';
        }
        attr = box_new_attr(css_new_ident(attr_name), value);
        if (attr) {
            *pattr = attr;
            pattr = &attr->next;
        }
    }

    /* close some tags (correct HTML mistakes) */
    if (s->html_syntax) {
        CSSBox *box1;
        const HTMLClosedTags *ct;
        ct = html_closed_tags;
        for (;;) {
            if (!ct->tag)
                break;
            if (css_tag == ct->tag) {
                box1 = s->box;
                while (box1 != NULL &&
                       css_get_enum(css_ident_str(box1->tag), ct->tag_closed) >= 0) {
                    html_eval_tag(s, box1);
                    box1 = box1->parent;
                }
                if (box1) {
                    s->box = box1;
                }
                break;
            }
            ct++;
        }
    }
    
    /* create the new box and add it */
    box = css_new_box(css_tag, NULL);
    box->attrs = first_attr;
    if (!s->box) {
        s->root_box = box;
    } else {
        css_make_child_box(s->box);
        css_add_box(s->box, box);
    }
    s->box = box;
    
    if ((s->flags & XML_DOCBOOK) && 
        css_tag == CSS_ID_programlisting) {
    pretag:
        pstrcpy(s->pretag, sizeof(s->pretag), tag);
        s->pretaglen = strlen(s->pretag);
        return XML_STATE_PRETAG;
    }

    len = strlen(buf);
    /* end of tag. If html, check also some common mistakes. FORM is
       considered as self closing to avoid any content problems */
    if ((len > 0 && buf[len - 1] == '/') ||
        (s->html_syntax && (css_tag == CSS_ID_br ||
                            css_tag == CSS_ID_hr ||
                            css_tag == CSS_ID_meta ||
                            css_tag == CSS_ID_link ||
                            css_tag == CSS_ID_form ||
                            css_tag == CSS_ID_base ||
                            css_tag == CSS_ID_input ||
                            css_tag == CSS_ID_basefont ||
                            css_tag == CSS_ID_img))) {
    end_of_tag:
        box1 = s->box;
        if (box1) {
            if (s->html_syntax) {
                if (css_tag != CSS_ID_NIL) {
                    /* close all non matching tags */
                    while (box1 != NULL && box1->tag != css_tag) {
                        html_eval_tag(s, box1);
                        box1 = box1->parent;
                    }
                }
                if (!box1) {
                    if (css_tag != CSS_ID_form)
                        xml_error(s, "unmatched closing tag </%s>", 
                                  css_ident_str(css_tag));
                } else {
                    html_eval_tag(s, box1);
                    s->box = box1->parent;
                }
            } else {
                if (css_tag != CSS_ID_NIL && box1->tag != css_tag) {
                    xml_error(s, "unmatched closing tag </%s> for <%s>",
                              css_ident_str(css_tag), css_ident_str(box1->tag));
                } else {
                    if (s->is_html)
                        html_eval_tag(s, box1);
                    s->box = box1->parent;
                }
            }
        }
    }
    return XML_STATE_TEXT;
}
Exemple #6
0
static int xml_parse_internal(XMLState *s, const char *buf_start, int buf_len,
                              EditBuffer *b, int offset_start)
{
    int ch, offset, offset0, text_offset_start, ret, offset_end;
    const char *buf_end, *buf;

    buf = buf_start;
    buf_end = buf + buf_len;
    offset = offset_start;
    offset_end = offset_start + buf_len;
    offset0 = 0; /* not used */
    text_offset_start = 0; /* not used */
    for (;;) {
        if (buf) {
            if (buf >= buf_end)
                break;
            ch = charset_decode(&s->charset_state, &buf);
        } else {
            if (offset >= offset_end)
                break;
            offset0 = offset;
            ch = eb_nextc(b, offset, &offset);
        }
        /* increment line number to signal errors */
        if (ch == '\n') {
            /* well, should add counter, but we test abort here */
            if (s->abort_func(s->abort_opaque))
                return -1;
            s->line_num++;
        }

        switch (s->state) {
        case XML_STATE_TAG:
            if (ch == '>') {
                strbuf_addch(&s->str, '\0');
                ret = parse_tag(s, (char *)s->str.buf);
                switch (ret) {
                default:
                case XML_STATE_TEXT:
                xml_text:
                    strbuf_reset(&s->str);
                    s->state = XML_STATE_TEXT;
                    text_offset_start = offset;
                    break;
                case XML_STATE_PRETAG:
                    strbuf_reset(&s->str);
                    s->state = XML_STATE_PRETAG;
                    text_offset_start = offset;
                    break;
                }
            } else {
                strbuf_addch(&s->str, ch);
                /* test comment */
                if (s->str.size == 3 &&
                    s->str.buf[0] == '!' &&
                    s->str.buf[1] == '-' &&
                    s->str.buf[2] == '-') {
                    s->state = XML_STATE_COMMENT;
                }
            }
            break;
        case XML_STATE_TEXT:
            if (ch == '<') {
                /* XXX: not strictly correct with comments : should
                   not flush if comment */
                if (buf) {
                    strbuf_addch(&s->str, '\0');
                    flush_text(s, (char *)s->str.buf);
                    strbuf_reset(&s->str);
                } else {
                    flush_text_buffer(s, text_offset_start, offset0);
                }
                s->state = XML_STATE_TAG;
            } else {
                if (buf) {
                    /* evaluate entities */
                    if (ch == '&') {
                        buf--;
                        ch = parse_entity(&buf);
                    }
                    strbuf_addch(&s->str, ch);
                }
            }
            break;
        case XML_STATE_COMMENT:
            if (ch == '-')
                s->state = XML_STATE_COMMENT1;
            break;
        case XML_STATE_COMMENT1:
            if (ch == '-')
                s->state = XML_STATE_COMMENT2;
            else
                s->state = XML_STATE_COMMENT;
            break;
        case XML_STATE_COMMENT2:
            if (ch == '>') {
                goto xml_text;
            } else if (ch != '-') {
                s->state = XML_STATE_COMMENT;
            }
            break;
        case XML_STATE_PRETAG:
            {
                int len, taglen;

                strbuf_addch(&s->str, ch);
                taglen = s->pretaglen + 2;
                len = s->str.size - taglen;
                if (len >= 0 && 
                    s->str.buf[len] == '<' && 
                    s->str.buf[len + 1] == '/' &&
                    !xml_tagcmp((char *)s->str.buf + len + 2, s->pretag)) {
                    s->str.buf[len] = '\0';
                    
                    if (!xml_tagcmp(s->pretag, "style")) {
                        if (s->style_sheet) {
                            CSSParseState b1, *b = &b1;
                            b->ptr = (char *)s->str.buf;
                            b->line_num = s->line_num; /* XXX: incorrect */
                            b->filename = s->filename;
                            b->ignore_case = s->ignore_case;
                            css_parse_style_sheet(s->style_sheet, b);
                        }
                    } else if (!xml_tagcmp(s->pretag, "script")) {
                        /* XXX: handle script */
                    } else {
                        /* just add the content */
                        if (buf) {
                            flush_text(s, (char *)s->str.buf);
                        } else {
                            /* XXX: would be incorrect if non ascii chars */
                            flush_text_buffer(s, text_offset_start, offset - taglen);
                        }
                        strbuf_reset(&s->str);
                        if (s->box)
                            s->box = s->box->parent;
                    }
                    s->state = XML_STATE_WAIT_EOT;
                }
            }
            break;
        case XML_STATE_WAIT_EOT:
            /* wait end of tag */
            if (ch == '>')
                    goto xml_text;
            break;
        }
    }
    return buf - buf_start;
}
Exemple #7
0
	virtual bool parse(const char * ptr) {
		pfc::string8 name, value;
		m_loopstart = 0;
		m_looplength = 0;
		m_by_meta = true;
		bool loopstart_found = false;
		bool looplength_found = false;
		bool loopend_found = false;
		t_uint64 m_loopend = 0;

		// [0-9]+ [0-9]+
		if (pfc::string_find_first(ptr, '=') == ~0)
		{
			char tmp;
			t_size n = 0;
			while (tmp = *ptr, tmp && !pfc::char_is_ascii_alphanumeric(tmp)) ++ptr;
			while (tmp = ptr[n], pfc::char_is_ascii_alphanumeric(tmp)) n++;
			if (!tmp) return true;
			m_loopstart = pfc::atoui64_ex(ptr, n);
			ptr += n;

			while (tmp = *ptr, tmp && !pfc::char_is_ascii_alphanumeric(tmp)) ++ptr;
			if (!tmp) return true;
			n = 0;
			while (tmp = ptr[n], pfc::char_is_ascii_alphanumeric(tmp)) n++;
			m_looplength = pfc::atoui64_ex(ptr, n);

			if (m_looplength) m_by_meta = false;
			return true;
		}

		while (parse_entity(ptr, name, value)) {
			for (;;)
			{
				if (!loopstart_found && (!pfc::stringCompareCaseInsensitive(name, "LOOPSTART") || !pfc::stringCompareCaseInsensitive(name, "START")))
				{
					m_loopstart = pfc::atoui64_ex(value, ~0);
					loopstart_found = true;
					break;
				}
				if (!looplength_found && (!pfc::stringCompareCaseInsensitive(name, "LOOPLENGTH") || !pfc::stringCompareCaseInsensitive(name, "LENGTH")))
				{
					m_looplength = pfc::atoui64_ex(value, ~0);
					if (m_looplength) looplength_found = true;
					break;
				}
				if (!loopend_found && (!pfc::stringCompareCaseInsensitive(name, "LOOPEND") || !pfc::stringCompareCaseInsensitive(name, "END")))
				{
					m_loopend = pfc::atoui64_ex(value, ~0);
					if (m_loopend > m_loopstart) loopend_found = true;
					break;
				}
				break;
			}
			if (loopstart_found)
			{
				if (looplength_found)
				{
					m_by_meta = false;
					break;
				}
				if (loopend_found)
				{
					if (m_loopend > m_loopstart)
					{
						m_looplength = m_loopend - m_loopstart;
						m_by_meta = false;
						break;
					}
					else
					{
						loopend_found = false;
					}
				}
			}
		}
		return true;
	}