예제 #1
0
파일: lexer.c 프로젝트: sproates/ocarina
token * lex_next(lexer * lex) {
  switch(lex->state) {
    case LEX_DEF:
      _lex_adv(lex);
      if(EOF == lex->current_char) { return lex_next(_lex_set_state(lex, LEX_DONE)); }
      else if(_is_int(lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_IN_INT)); }
      else if(_is_space(lex->current_char)) { return lex_next(lex); }
      else if(_is_str_end(lex->current_char)) { return lex_next(_lex_adv(_lex_set_state(lex, LEX_IN_STR))); }
      else if(_is_id_start(lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_IN_ID)); }
      else if(_is_ctrl_char(lex->current_char)) { return _token_ctrl_char(lex->current_char); }
      else { return lex_next(_lex_set_state(lex, LEX_ERR)); }
    case LEX_IN_INT:
      if(_is_int(lex->current_char)) {
        if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); }
        return lex_next(_lex_adv(lex));
      }
      return _tok_new(lex, TOK_INT, LEX_DEF);
    case LEX_IN_ID:
      if(_is_id(lex->current_char)) {
        if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); }
        return lex_next(_lex_adv(lex));
      }
      if(_is_keyword(lex->tok_buf)) { return _tok_new(lex, TOK_KEY, LEX_DEF); }
      return _tok_new(lex, TOK_ID, LEX_DEF);
    case LEX_IN_STR:
      if(!_is_str_end(lex->current_char)) {
        if(_is_escape(lex->current_char)) { return lex_next(_lex_adv(_lex_set_state(lex, LEX_IN_ESC))); }
        if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); }
        return lex_next(_lex_adv(lex));
      }
      return _tok_new(lex, TOK_STR, LEX_DEF);
    case LEX_IN_ESC:
      if(_is_escapable(lex->current_char)) {
        _lex_set_char(lex, _make_escape(lex->current_char));
        if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); }
        return lex_next(_lex_adv(_lex_set_state(lex, LEX_IN_STR)));
      }
      return lex_next(_lex_set_state(lex, LEX_ERR));
    case LEX_ERR: return _tok_new(lex, TOK_ERR, LEX_DEF);
    case LEX_DONE:
    default: return _tok_new(lex, TOK_EOF, LEX_DONE);
  }
}
예제 #2
0
파일: ibusxml.c 프로젝트: definite/ibus
static void
_text_cb (GMarkupParseContext *context,
          const gchar         *text,
          gsize                text_len,
          gpointer             user_data,
          GError             **error)
{
    XMLNode *p = (XMLNode *)user_data;

    if (_is_space (text, text_len)) {
        return;
    }

    if (p->sub_nodes || p->text) {
        g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT, " ");
        return;
    }

    p->text = g_strndup (text, text_len);
}
예제 #3
0
/**
 * fm_xml_file_parse_data
 * @file: the parser container
 * @text: data to parse
 * @size: size of @text
 * @error: (allow-none) (out): location to save error
 * @user_data: data to pass to handlers
 *
 * Parses next chunk of @text data. Parsing stops at end of data or at any
 * error. In latter case @error will be set appropriately.
 *
 * See also: fm_xml_file_finish_parse().
 *
 * Returns: %FALSE if parsing failed.
 *
 * Since: 1.2.0
 */
gboolean fm_xml_file_parse_data(FmXmlFile *file, const char *text,
                                gsize size, GError **error, gpointer user_data)
{
    gsize ptr, len;
    char *dst, *end, *tag, *name, *value;
    GString *buff;
    FmXmlFileItem *item;
    gboolean closing, selfdo;
    FmXmlFileTag i;
    char **attrib_names, **attrib_values;
    guint attribs;
    char quote;

    g_return_val_if_fail(file != NULL && FM_IS_XML_FILE(file), FALSE);
_restart:
    if (size == 0)
        return TRUE;
    /* if file->data has '<' as first char then we stopped at tag */
    if (file->data && file->data->len && file->data->str[0] == '<')
    {
        for (ptr = 0; ptr < size; ptr++)
            if (text[ptr] == '>')
                break;
        if (ptr == size) /* still no end of that tag */
        {
            g_string_append_len(file->data, text, size);
            return TRUE;
        }
        /* we got a complete tag, nice, let parse it */
        g_string_append_len(file->data, text, ptr);
        ptr++;
        text += ptr;
        size -= ptr;
        /* check for CDATA first */
        if (file->data->len >= 11 /* <![CDATA[]] */ &&
            strncmp(file->data->str, "<![CDATA[", 9) == 0)
        {
            end = file->data->str + file->data->len;
            if (end[-2] != ']' || end[-1] != ']') /* find end of CDATA */
            {
                g_string_append_c(file->data, '>');
                goto _restart;
            }
            if (file->current_item == NULL) /* CDATA at top level! */
                g_warning("FmXmlFile: line %u: junk CDATA in XML file ignored",
                          file->line);
            else
            {
                item = fm_xml_file_item_new(FM_XML_FILE_TEXT);
                item->text = item->comment = g_strndup(&file->data->str[9],
                                                       file->data->len - 11);
                fm_xml_file_item_append_child(file->current_item, item);
            }
            _update_file_ptr(file, 1);
            g_string_truncate(file->data, 0);
            goto _restart;
        }
        /* check for comment */
        if (file->data->len >= 7 /* <!-- -- */ &&
            strncmp(file->data->str, "<!--", 4) == 0)
        {
            end = file->data->str + file->data->len;
            if (end[-2] != '-' || end[-1] != '-') /* find end of comment */
            {
                g_string_append_c(file->data, '>');
                goto _restart;
            }
            g_free(file->comment_pre);
            /* FIXME: not ignore duplicate comments */
            if (_is_space(end[-3]))
                file->comment_pre = g_strndup(&file->data->str[5],
                                              file->data->len - 8);
            else /* FIXME: check: XML spec says it should be not '-' */
                file->comment_pre = g_strndup(&file->data->str[5],
                                              file->data->len - 7);
            _update_file_ptr(file, 1);
            g_string_truncate(file->data, 0);
            goto _restart;
        }
        /* check for DTD - it may be only at top level */
        if (file->current_item == NULL && file->data->len >= 10 &&
            strncmp(file->data->str, "<!DOCTYPE", 9) == 0 &&
            _is_space(file->data->str[9]))
        {
            /* FIXME: can DTD contain any tags? count '<' and '>' pairs */
            if (file->tags[0].name) /* duplicate DTD! */
                g_warning("FmXmlFile: line %u: duplicate DTD, ignored",
                          file->line);
            else
                file->tags[0].name = g_strndup(&file->data->str[10],
                                               file->data->len - 10);
            _update_file_ptr(file, 1);
            g_string_truncate(file->data, 0);
            goto _restart;
        }
        /* support directives such as <?xml ..... ?> */
        if (file->data->len >= 4 /* <?x? */ &&
            file->data->str[1] == '?' &&
            file->data->str[file->data->len-1] == '?')
        {
            item = fm_xml_file_item_new(FM_XML_FILE_TEXT);
            item->comment = g_strndup(&file->data->str[2], file->data->len - 3);
            if (file->current_item != NULL)
                fm_xml_file_item_append_child(file->current_item, item);
            else
            {
                item->file = file;
                item->parent_list = &file->items;
                file->items = g_list_append(file->items, item);
            }
            _update_file_ptr(file, 1);
            g_string_truncate(file->data, 0);
            goto _restart;
        }
        closing = (file->data->str[1] == '/');
        end = file->data->str + file->data->len;
        selfdo = (!closing && end[-1] == '/');
        if (selfdo)
            end--;
        tag = closing ? &file->data->str[2] : &file->data->str[1];
        for (dst = tag; dst < end; dst++)
            if (_is_space(*dst))
                break;
        _update_file_ptr_part(file, file->data->str, dst + 1);
        *dst = '\0'; /* terminate the tag */
        if (closing)
        {
            if (dst != end) /* we got a space char in closing tag */
            {
                g_set_error_literal(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE,
                                    _("Space isn't allowed in the close tag"));
                return FALSE;
            }
            /* g_debug("XML parser: found closing tag '%s' for %p at %d:%d", tag,
                    file->current_item, file->line, file->pos); */
            item = file->current_item;
            if (item == NULL) /* no tag to close */
            {
                g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE,
                            _("Element '%s' was closed but no element was opened"),
                            tag);
                return FALSE;
            }
            else
            {
                char *tagname;

                if (item->tag == FM_XML_FILE_TAG_NOT_HANDLED)
                    tagname = item->tag_name;
                else
                    tagname = file->tags[item->tag].name;
                if (strcmp(tag, tagname)) /* closing tag doesn't match */
                {
                    /* FIXME: validate tag so be more verbose on error */
                    g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE,
                                _("Element '%s' was closed but the currently "
                                  "open element is '%s'"), tag, tagname);
                    return FALSE;
                }
                file->current_item = item->parent;
_close_the_tag:
                /* g_debug("XML parser: close the tag '%s'", tag); */
                g_string_truncate(file->data, 0);
                if (item->tag != FM_XML_FILE_TAG_NOT_HANDLED)
                {
                    if (!file->tags[item->tag].handler(item, item->children,
                                                       item->attribute_names,
                                                       item->attribute_values,
                                                       item->attribute_names ? g_strv_length(item->attribute_names) : 0,
                                                       file->line,
                                                       file->pos,
                                                       error, user_data))
                        return FALSE;
                }
                file->pos++; /* '>' */
                goto _restart;
            }
        }
        else /* opening tag */
        {
            /* g_debug("XML parser: found opening tag '%s'", tag); */
            /* parse and check tag name */
            for (i = 1; i < file->n_tags; i++)
                if (strcmp(file->tags[i].name, tag) == 0)
                    break;
            if (i == file->n_tags)
                /* FIXME: do name validation */
                i = FM_XML_FILE_TAG_NOT_HANDLED;
            /* parse and check attributes */
            attribs = 0;
            attrib_names = attrib_values = NULL;
            while (dst < end)
            {
                name = &dst[1]; /* skip this space */
                while (name < end && _is_space(*name))
                    name++;
                value = name;
                while (value < end && !_is_space(*value) && *value != '=')
                    value++;
                len = value - name;
                _update_file_ptr_part(file, dst, value);
                /* FIXME: skip spaces before =? */
                if (value + 3 <= end && *value == '=') /* minimum is ="" */
                {
                    value++;
                    file->pos++; /* '=' */
                    /* FIXME: skip spaces after =? */
                    quote = *value++;
                    if (quote != '\'' && quote != '"')
                    {
                        g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE,
                                    _("Invalid char '%c' at start of attribute value"),
                                    quote);
                        goto _attr_error;
                    }
                    file->pos++; /* quote char */
                    for (ptr = 0; &value[ptr] < end; ptr++)
                        if (value[ptr] == quote)
                            break;
                    if (&value[ptr] == end)
                    {
                        g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE,
                                    _("Invalid char '%c' at end of attribute value,"
                                      " expected '%c'"), value[ptr-1], quote);
                        goto _attr_error;
                    }
                    buff = g_string_new_len(value, ptr);
                    if (!unescape_gstring_inplace(buff, &file->line,
                                                  &file->pos, TRUE, error))
                    {
                        g_string_free(buff, TRUE);
_attr_error:
                        for (i = 0; i < attribs; i++)
                        {
                            g_free(attrib_names[i]);
                            g_free(attrib_values[i]);
                        }
                        g_free(attrib_names);
                        g_free(attrib_values);
                        return FALSE;
                    }
                    dst = &value[ptr+1];
                    value = g_string_free(buff, FALSE);
                    file->pos++; /* end quote char */
                }
                else
                {
                    dst = value;
                    value = NULL;
                    /* FIXME: isn't it error? */
                }
                attrib_names = g_renew(char *, attrib_names, attribs + 2);
                attrib_values = g_renew(char *, attrib_values, attribs + 2);
                attrib_names[attribs] = g_strndup(name, len);
                attrib_values[attribs] = value;
                attribs++;
            }
            if (attribs > 0)
            {
                attrib_names[attribs] = NULL;
                attrib_values[attribs] = NULL;
            }
            /* create new item */
            item = fm_xml_file_item_new(i);
            item->attribute_names = attrib_names;
            item->attribute_values = attrib_values;
            if (i == FM_XML_FILE_TAG_NOT_HANDLED)
                item->tag_name = g_strdup(tag);
            /* insert new item into the container */
            item->comment = file->comment_pre;
            file->comment_pre = NULL;
            if (file->current_item)
                fm_xml_file_item_append_child(file->current_item, item);
            else
            {
                item->file = file;
                item->parent_list = &file->items;
                file->items = g_list_append(file->items, item);
            }
            file->pos++; /* '>' or '/' */
            if (selfdo) /* simple self-closing tag */
                goto _close_the_tag;
            file->current_item = item;
            g_string_truncate(file->data, 0);
            goto _restart;
        }
    }
    /* otherwise we stopped at some data somewhere */
    else
    {
        if (!file->data || file->data->len == 0) while (size > 0)