/* generic tag parsing (used for RSS and Atom) */ static void parse_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur, gboolean isFeedTag) { int i, j; gchar *date, *value, *tmp; const gchar *mapping; gboolean isNotEmpty; if (!isFeedTag) { /* special handling for the ISO 8601 date item tags */ if (!xmlStrcmp (BAD_CAST "date", cur->name)) { if (NULL != (date = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1))) { i = date_parse_ISO8601 (date); ctxt->item->time = i; g_free (date); } return; } /* special handling for item titles */ if(!xmlStrcmp (BAD_CAST "title", cur->name)) { value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if(value) { item_set_title(ctxt->item, value); g_free(value); } return; } } /* compare with each possible tag name */ for (i = 0; taglist[i] != NULL; i++) { if (!xmlStrcmp ((const xmlChar *)taglist[i], cur->name)) { value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if (value) { /* check if value consist of whitespaces only */ for (j = 0, tmp = value, isNotEmpty = FALSE; j < g_utf8_strlen (value, -1); j++) { if (!g_unichar_isspace (*tmp)) { isNotEmpty = TRUE; break; } tmp = g_utf8_next_char (tmp); } if (isNotEmpty) { if (isFeedTag) { if (NULL != (mapping = mapToFeedMetadata[i])) ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, mapping, value); } else { if (NULL != (mapping = mapToItemMetadata[i])) ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, mapping, value); } } g_free (value); } return; } } }
static void atom10_parse_feed_updated (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *timestamp; timestamp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (timestamp) { ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "contentUpdateDate", timestamp); ctxt->feed->time = date_parse_ISO8601 (timestamp); g_free (timestamp); } }
static void atom10_parse_entry_published (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *datestr; datestr = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (datestr) { ctxt->item->time = date_parse_ISO8601 (datestr); ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "pubDate", datestr); g_free (datestr); } }
static void atom10_parse_entry_updated (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *datestr; datestr = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); /* if pubDate is already set, don't overwrite it */ if (datestr && !metadata_list_get(ctxt->item->metadata, "pubDate")) { ctxt->item->time = date_parse_ISO8601 (datestr); ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "contentUpdateDate", datestr); } g_free (datestr); }
static void parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *date, *source, *sourceURL, *tmp; gboolean sourceTag = FALSE; if (!xmlStrcmp (BAD_CAST "source", cur->name)) { sourceTag = TRUE; g_hash_table_insert (ctxt->item->tmpdata, g_strdup ("ag:source"), (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)); } else if (!xmlStrcmp (BAD_CAST "sourceURL", cur->name)) { sourceTag = TRUE; g_hash_table_insert (ctxt->item->tmpdata, g_strdup ("ag:sourceURL"), (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)); } if (sourceTag) { source = g_hash_table_lookup (ctxt->item->tmpdata, "ag:source"); sourceURL = g_hash_table_lookup (ctxt->item->tmpdata, "ag:sourceURL"); if (source && sourceURL) tmp = g_strdup_printf ("<a href=\"%s\">%s</a>", sourceURL, source); else if (!source) tmp = g_strdup_printf ("<a href=\"%s\">%s</a>", sourceURL, sourceURL); else tmp = g_strdup (source); metadata_list_set (&(ctxt->item->metadata), "agSource", tmp); } else if (!xmlStrcmp (BAD_CAST "timestamp", cur->name)) { if (NULL != (tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1))) { date = date_format (date_parse_ISO8601 (tmp), _("%b %d %H:%M")); metadata_list_set (&(ctxt->item->metadata), "agTimestamp", date); g_free (date); g_free (tmp); } } }
/* reads a PIE feed URL and returns a new channel structure (even if the feed could not be read) */ static void pie_parse(feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp2, *tmp = NULL, *tmp3; NsHandler *nsh; parseChannelTagFunc pf; while(TRUE) { if(xmlStrcmp(cur->name, BAD_CAST"feed")) { g_string_append(ctxt->feed->parseErrors, "<p>Could not find Atom/Echo/PIE header!</p>"); break; } /* parse feed contents */ cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { pf = nsh->parseChannelTag; if(pf) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { tmp = unhtmlize(pie_parse_content_construct(cur)); if(tmp) { if(ctxt->title) g_free(ctxt->title); ctxt->title = tmp; } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { tmp = xml_get_attribute (cur, "href"); if(tmp) { /* 0.3 link : rel, type and href attribute */ tmp2 = xml_get_attribute (cur, "rel"); if(tmp2 && g_str_equal(tmp2, "alternate")) subscription_set_homepage (ctxt->subscription, tmp); /* else FIXME: Maybe do something with other links? */ g_free(tmp2); g_free(tmp); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { subscription_set_homepage (ctxt->subscription, tmp); g_free(tmp); } } /* parse feed author */ } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "author", tmp); g_free(tmp); } } else if (!xmlStrcmp (cur->name, BAD_CAST"tagline")) { tmp = pie_parse_content_construct (cur); if (tmp) { metadata_list_set (&ctxt->subscription->metadata, "description", tmp); g_free (tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"generator")) { tmp = unhtmlize((gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1)); if(tmp && tmp[0] != '\0') { tmp2 = xml_get_attribute (cur, "version"); if(tmp2) { tmp3 = g_strdup_printf("%s %s", tmp, tmp2); g_free(tmp); g_free(tmp2); tmp = tmp3; } tmp2 = xml_get_attribute (cur, "url"); if(tmp2) { tmp3 = g_strdup_printf("<a href=\"%s\">%s</a>", tmp2, tmp); g_free(tmp2); g_free(tmp); tmp = tmp3; } ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "feedgenerator", tmp); } g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { tmp = pie_parse_content_construct(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "copyright", tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"modified")) { /* Modified was last used in IETF draft 02) */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"updated")) { /* Updated was added in IETF draft 03 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "contributor", tmp); g_free(tmp); } } else if((!xmlStrcmp(cur->name, BAD_CAST"entry"))) { ctxt->item = parseEntry(ctxt, cur); if(ctxt->item) { if(0 == ctxt->item->time) ctxt->item->time = ctxt->feed->time; ctxt->items = g_list_append(ctxt->items, ctxt->item); } } cur = cur->next; } break; } }
/* method to parse standard tags for each item element */ itemPtr parseEntry(feedParserCtxtPtr ctxt, xmlNodePtr cur) { xmlChar *xtmp; gchar *tmp2, *tmp; NsHandler *nsh; parseItemTagFunc pf; g_assert(NULL != cur); ctxt->item = item_new(); cur = cur->xmlChildrenNode; while(cur) { if(!cur->name) { g_warning("invalid XML: parser returns NULL value -> tag ignored!"); cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { if(NULL != (pf = nsh->parseItemTag)) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { if(NULL != (tmp = unhtmlize(pie_parse_content_construct(cur)))) { item_set_title(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { if(NULL != (tmp2 = xml_get_attribute(cur, "href"))) { /* 0.3 link : rel, type and href attribute */ xtmp = xmlGetProp(cur, BAD_CAST"rel"); if(xtmp != NULL && !xmlStrcmp(xtmp, BAD_CAST"alternate")) item_set_source(ctxt->item, tmp2); /* else FIXME: Maybe do something with other links? */ xmlFree(xtmp); g_free(tmp2); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_source(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "author", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { /* parse feed contributors */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "contributor", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"id")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_id(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"issued")) { /* FIXME: is <modified> or <issued> or <created> the time tag we want to display? */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"content")) { /* <content> support */ if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"summary")) { /* <summary> can be used for short text descriptions, if there is no <content> description we show the <summary> content */ if(!item_get_description(ctxt->item)) { if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "copyright", tmp); g_free(tmp); } } cur = cur->next; } /* after parsing we fill the infos into the itemPtr structure */ ctxt->item->readStatus = FALSE; return ctxt->item; }