token * lex_next(lexer * lex) { switch(lex->state) { case LEX_DEF: _lex_adv(lex); if(EOF == lex->current_char) { return lex_next(_lex_set_state(lex, LEX_DONE)); } else if(_is_int(lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_IN_INT)); } else if(_is_space(lex->current_char)) { return lex_next(lex); } else if(_is_str_end(lex->current_char)) { return lex_next(_lex_adv(_lex_set_state(lex, LEX_IN_STR))); } else if(_is_id_start(lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_IN_ID)); } else if(_is_ctrl_char(lex->current_char)) { return _token_ctrl_char(lex->current_char); } else { return lex_next(_lex_set_state(lex, LEX_ERR)); } case LEX_IN_INT: if(_is_int(lex->current_char)) { if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); } return lex_next(_lex_adv(lex)); } return _tok_new(lex, TOK_INT, LEX_DEF); case LEX_IN_ID: if(_is_id(lex->current_char)) { if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); } return lex_next(_lex_adv(lex)); } if(_is_keyword(lex->tok_buf)) { return _tok_new(lex, TOK_KEY, LEX_DEF); } return _tok_new(lex, TOK_ID, LEX_DEF); case LEX_IN_STR: if(!_is_str_end(lex->current_char)) { if(_is_escape(lex->current_char)) { return lex_next(_lex_adv(_lex_set_state(lex, LEX_IN_ESC))); } if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); } return lex_next(_lex_adv(lex)); } return _tok_new(lex, TOK_STR, LEX_DEF); case LEX_IN_ESC: if(_is_escapable(lex->current_char)) { _lex_set_char(lex, _make_escape(lex->current_char)); if(0 == str_add(lex->tok_buf, lex->current_char)) { return lex_next(_lex_set_state(lex, LEX_ERR)); } return lex_next(_lex_adv(_lex_set_state(lex, LEX_IN_STR))); } return lex_next(_lex_set_state(lex, LEX_ERR)); case LEX_ERR: return _tok_new(lex, TOK_ERR, LEX_DEF); case LEX_DONE: default: return _tok_new(lex, TOK_EOF, LEX_DONE); } }
static void _text_cb (GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error) { XMLNode *p = (XMLNode *)user_data; if (_is_space (text, text_len)) { return; } if (p->sub_nodes || p->text) { g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT, " "); return; } p->text = g_strndup (text, text_len); }
/** * fm_xml_file_parse_data * @file: the parser container * @text: data to parse * @size: size of @text * @error: (allow-none) (out): location to save error * @user_data: data to pass to handlers * * Parses next chunk of @text data. Parsing stops at end of data or at any * error. In latter case @error will be set appropriately. * * See also: fm_xml_file_finish_parse(). * * Returns: %FALSE if parsing failed. * * Since: 1.2.0 */ gboolean fm_xml_file_parse_data(FmXmlFile *file, const char *text, gsize size, GError **error, gpointer user_data) { gsize ptr, len; char *dst, *end, *tag, *name, *value; GString *buff; FmXmlFileItem *item; gboolean closing, selfdo; FmXmlFileTag i; char **attrib_names, **attrib_values; guint attribs; char quote; g_return_val_if_fail(file != NULL && FM_IS_XML_FILE(file), FALSE); _restart: if (size == 0) return TRUE; /* if file->data has '<' as first char then we stopped at tag */ if (file->data && file->data->len && file->data->str[0] == '<') { for (ptr = 0; ptr < size; ptr++) if (text[ptr] == '>') break; if (ptr == size) /* still no end of that tag */ { g_string_append_len(file->data, text, size); return TRUE; } /* we got a complete tag, nice, let parse it */ g_string_append_len(file->data, text, ptr); ptr++; text += ptr; size -= ptr; /* check for CDATA first */ if (file->data->len >= 11 /* <![CDATA[]] */ && strncmp(file->data->str, "<![CDATA[", 9) == 0) { end = file->data->str + file->data->len; if (end[-2] != ']' || end[-1] != ']') /* find end of CDATA */ { g_string_append_c(file->data, '>'); goto _restart; } if (file->current_item == NULL) /* CDATA at top level! */ g_warning("FmXmlFile: line %u: junk CDATA in XML file ignored", file->line); else { item = fm_xml_file_item_new(FM_XML_FILE_TEXT); item->text = item->comment = g_strndup(&file->data->str[9], file->data->len - 11); fm_xml_file_item_append_child(file->current_item, item); } _update_file_ptr(file, 1); g_string_truncate(file->data, 0); goto _restart; } /* check for comment */ if (file->data->len >= 7 /* <!-- -- */ && strncmp(file->data->str, "<!--", 4) == 0) { end = file->data->str + file->data->len; if (end[-2] != '-' || end[-1] != '-') /* find end of comment */ { g_string_append_c(file->data, '>'); goto _restart; } g_free(file->comment_pre); /* FIXME: not ignore duplicate comments */ if (_is_space(end[-3])) file->comment_pre = g_strndup(&file->data->str[5], file->data->len - 8); else /* FIXME: check: XML spec says it should be not '-' */ file->comment_pre = g_strndup(&file->data->str[5], file->data->len - 7); _update_file_ptr(file, 1); g_string_truncate(file->data, 0); goto _restart; } /* check for DTD - it may be only at top level */ if (file->current_item == NULL && file->data->len >= 10 && strncmp(file->data->str, "<!DOCTYPE", 9) == 0 && _is_space(file->data->str[9])) { /* FIXME: can DTD contain any tags? count '<' and '>' pairs */ if (file->tags[0].name) /* duplicate DTD! */ g_warning("FmXmlFile: line %u: duplicate DTD, ignored", file->line); else file->tags[0].name = g_strndup(&file->data->str[10], file->data->len - 10); _update_file_ptr(file, 1); g_string_truncate(file->data, 0); goto _restart; } /* support directives such as <?xml ..... ?> */ if (file->data->len >= 4 /* <?x? */ && file->data->str[1] == '?' && file->data->str[file->data->len-1] == '?') { item = fm_xml_file_item_new(FM_XML_FILE_TEXT); item->comment = g_strndup(&file->data->str[2], file->data->len - 3); if (file->current_item != NULL) fm_xml_file_item_append_child(file->current_item, item); else { item->file = file; item->parent_list = &file->items; file->items = g_list_append(file->items, item); } _update_file_ptr(file, 1); g_string_truncate(file->data, 0); goto _restart; } closing = (file->data->str[1] == '/'); end = file->data->str + file->data->len; selfdo = (!closing && end[-1] == '/'); if (selfdo) end--; tag = closing ? &file->data->str[2] : &file->data->str[1]; for (dst = tag; dst < end; dst++) if (_is_space(*dst)) break; _update_file_ptr_part(file, file->data->str, dst + 1); *dst = '\0'; /* terminate the tag */ if (closing) { if (dst != end) /* we got a space char in closing tag */ { g_set_error_literal(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, _("Space isn't allowed in the close tag")); return FALSE; } /* g_debug("XML parser: found closing tag '%s' for %p at %d:%d", tag, file->current_item, file->line, file->pos); */ item = file->current_item; if (item == NULL) /* no tag to close */ { g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, _("Element '%s' was closed but no element was opened"), tag); return FALSE; } else { char *tagname; if (item->tag == FM_XML_FILE_TAG_NOT_HANDLED) tagname = item->tag_name; else tagname = file->tags[item->tag].name; if (strcmp(tag, tagname)) /* closing tag doesn't match */ { /* FIXME: validate tag so be more verbose on error */ g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, _("Element '%s' was closed but the currently " "open element is '%s'"), tag, tagname); return FALSE; } file->current_item = item->parent; _close_the_tag: /* g_debug("XML parser: close the tag '%s'", tag); */ g_string_truncate(file->data, 0); if (item->tag != FM_XML_FILE_TAG_NOT_HANDLED) { if (!file->tags[item->tag].handler(item, item->children, item->attribute_names, item->attribute_values, item->attribute_names ? g_strv_length(item->attribute_names) : 0, file->line, file->pos, error, user_data)) return FALSE; } file->pos++; /* '>' */ goto _restart; } } else /* opening tag */ { /* g_debug("XML parser: found opening tag '%s'", tag); */ /* parse and check tag name */ for (i = 1; i < file->n_tags; i++) if (strcmp(file->tags[i].name, tag) == 0) break; if (i == file->n_tags) /* FIXME: do name validation */ i = FM_XML_FILE_TAG_NOT_HANDLED; /* parse and check attributes */ attribs = 0; attrib_names = attrib_values = NULL; while (dst < end) { name = &dst[1]; /* skip this space */ while (name < end && _is_space(*name)) name++; value = name; while (value < end && !_is_space(*value) && *value != '=') value++; len = value - name; _update_file_ptr_part(file, dst, value); /* FIXME: skip spaces before =? */ if (value + 3 <= end && *value == '=') /* minimum is ="" */ { value++; file->pos++; /* '=' */ /* FIXME: skip spaces after =? */ quote = *value++; if (quote != '\'' && quote != '"') { g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, _("Invalid char '%c' at start of attribute value"), quote); goto _attr_error; } file->pos++; /* quote char */ for (ptr = 0; &value[ptr] < end; ptr++) if (value[ptr] == quote) break; if (&value[ptr] == end) { g_set_error(error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, _("Invalid char '%c' at end of attribute value," " expected '%c'"), value[ptr-1], quote); goto _attr_error; } buff = g_string_new_len(value, ptr); if (!unescape_gstring_inplace(buff, &file->line, &file->pos, TRUE, error)) { g_string_free(buff, TRUE); _attr_error: for (i = 0; i < attribs; i++) { g_free(attrib_names[i]); g_free(attrib_values[i]); } g_free(attrib_names); g_free(attrib_values); return FALSE; } dst = &value[ptr+1]; value = g_string_free(buff, FALSE); file->pos++; /* end quote char */ } else { dst = value; value = NULL; /* FIXME: isn't it error? */ } attrib_names = g_renew(char *, attrib_names, attribs + 2); attrib_values = g_renew(char *, attrib_values, attribs + 2); attrib_names[attribs] = g_strndup(name, len); attrib_values[attribs] = value; attribs++; } if (attribs > 0) { attrib_names[attribs] = NULL; attrib_values[attribs] = NULL; } /* create new item */ item = fm_xml_file_item_new(i); item->attribute_names = attrib_names; item->attribute_values = attrib_values; if (i == FM_XML_FILE_TAG_NOT_HANDLED) item->tag_name = g_strdup(tag); /* insert new item into the container */ item->comment = file->comment_pre; file->comment_pre = NULL; if (file->current_item) fm_xml_file_item_append_child(file->current_item, item); else { item->file = file; item->parent_list = &file->items; file->items = g_list_append(file->items, item); } file->pos++; /* '>' or '/' */ if (selfdo) /* simple self-closing tag */ goto _close_the_tag; file->current_item = item; g_string_truncate(file->data, 0); goto _restart; } } /* otherwise we stopped at some data somewhere */ else { if (!file->data || file->data->len == 0) while (size > 0)