/* generic tag parsing (used for RSS and Atom) */ static void parse_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur, gboolean isFeedTag) { int i, j; gchar *date, *value, *tmp; const gchar *mapping; gboolean isNotEmpty; if (!isFeedTag) { /* special handling for the ISO 8601 date item tags */ if (!xmlStrcmp (BAD_CAST "date", cur->name)) { if (NULL != (date = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1))) { i = date_parse_ISO8601 (date); ctxt->item->time = i; g_free (date); } return; } /* special handling for item titles */ if(!xmlStrcmp (BAD_CAST "title", cur->name)) { value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if(value) { item_set_title(ctxt->item, value); g_free(value); } return; } } /* compare with each possible tag name */ for (i = 0; taglist[i] != NULL; i++) { if (!xmlStrcmp ((const xmlChar *)taglist[i], cur->name)) { value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if (value) { /* check if value consist of whitespaces only */ for (j = 0, tmp = value, isNotEmpty = FALSE; j < g_utf8_strlen (value, -1); j++) { if (!g_unichar_isspace (*tmp)) { isNotEmpty = TRUE; break; } tmp = g_utf8_next_char (tmp); } if (isNotEmpty) { if (isFeedTag) { if (NULL != (mapping = mapToFeedMetadata[i])) ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, mapping, value); } else { if (NULL != (mapping = mapToItemMetadata[i])) ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, mapping, value); } } g_free (value); } return; } } }
static void theoldreader_source_merge_feed (TheOldReaderSourcePtr source, const gchar *url, const gchar *title, const gchar *id, nodePtr folder) { nodePtr node; node = feedlist_find_node (source->root, NODE_BY_URL, url); if (!node) { debug2 (DEBUG_UPDATE, "adding %s (%s)", title, url); node = node_new (feed_get_node_type ()); node_set_title (node, title); node_set_data (node, feed_new ()); node_set_subscription (node, subscription_new (url, NULL, NULL)); node->subscription->type = source->root->source->type->feedSubscriptionType; /* Save TheOldReader feed id which we need to fetch items... */ node->subscription->metadata = metadata_list_append (node->subscription->metadata, "theoldreader-feed-id", id); db_subscription_update (node->subscription); node_set_parent (node, folder?folder:source->root, -1); feedlist_node_imported (node); /** * @todo mark the ones as read immediately after this is done * the feed as retrieved by this has the read and unread * status inherently. */ subscription_update (node->subscription, FEED_REQ_RESET_TITLE | FEED_REQ_PRIORITY_HIGH); subscription_update_favicon (node->subscription); } else { node_source_update_folder (node, folder); } }
static void atom10_parse_feed_generator (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state) { gchar *ret, *version, *tmp = NULL, *uri; ret = unhtmlize ((gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1)); if (ret && ret[0] != '\0') { version = xml_get_ns_attribute (cur, "version", NULL); if (version) { tmp = g_strdup_printf ("%s %s", ret, version); g_free (ret); g_free (version); ret = tmp; } uri = xml_get_ns_attribute (cur, "uri", NULL); if (uri) { tmp = g_markup_printf_escaped ("<a href=\"%s\">%s</a>", uri, ret); g_free (uri); g_free (ret); ret = tmp; } ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "feedgenerator", tmp); } g_free (ret); }
static void atom10_parse_feed_contributor (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state) { /* parse feed contributors */ gchar *contributer = atom10_parse_person_construct (cur); ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "contributor", contributer); g_free (contributer); }
static void atom10_parse_feed_author (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state) { /* parse feed author */ gchar *author = atom10_parse_person_construct (cur); ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "author", author); g_free (author); /* FIXME: make item parsing use this author if not specified elsewhere */ }
static GSList * db_metadata_list_append (GSList *metadata, const char *key, const char *value) { if (metadata_is_type_registered (key)) metadata = metadata_list_append (metadata, key, value); else debug1 (DEBUG_DB, "Trying to load unregistered metadata type %s from DB.", key); return metadata; }
static void atom10_parse_entry_contributor (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *contributor; contributor = atom10_parse_person_construct (cur); if (contributor) { ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "contributor", contributor); g_free (contributor); } }
static void atom10_parse_feed_rights (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *rights; rights = atom10_parse_text_construct (cur, FALSE); if (rights) { ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "copyright", rights); g_free (rights); } }
static void atom10_parse_feed_updated (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *timestamp; timestamp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (timestamp) { ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "contentUpdateDate", timestamp); ctxt->feed->time = date_parse_ISO8601 (timestamp); g_free (timestamp); } }
static void atom10_parse_entry_published (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *datestr; datestr = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (datestr) { ctxt->item->time = date_parse_ISO8601 (datestr); ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "pubDate", datestr); g_free (datestr); } }
static void atom10_parse_feed_icon (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *icon_uri; icon_uri = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (icon_uri) { debug1 (DEBUG_PARSING, "icon URI found in atom feed: %s", icon_uri); ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "icon", icon_uri); } }
static void ns_itunes_parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp; if (!xmlStrcmp(cur->name, BAD_CAST"author")) { tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (tmp) { ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "author", tmp); g_free (tmp); } } if (!xmlStrcmp (cur->name, BAD_CAST"summary")) { tmp = xhtml_extract (cur, 0, NULL); item_set_description (ctxt->item, tmp); g_free (tmp); } if (!xmlStrcmp(cur->name, BAD_CAST"keywords")) { gchar *keyword = tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); gchar *allocated = tmp; /* parse comma separated list and strip leading spaces... */ while (tmp) { tmp = strchr (tmp, ','); if (tmp) { *tmp = 0; tmp++; } while (g_unichar_isspace (*keyword)) { keyword = g_utf8_next_char (keyword); } ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "category", keyword); keyword = tmp; } g_free (allocated); } }
static void atom10_parse_entry_updated (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *datestr; datestr = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); /* if pubDate is already set, don't overwrite it */ if (datestr && !metadata_list_get(ctxt->item->metadata, "pubDate")) { ctxt->item->time = date_parse_ISO8601 (datestr); ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "contentUpdateDate", datestr); } g_free (datestr); }
static void atom10_parse_feed_category (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state) { gchar *label = NULL; label = xml_get_ns_attribute (cur, "label", NULL); if (!label) label = xml_get_ns_attribute (cur, "term", NULL); if (label) { gchar *escaped = g_markup_escape_text (label, -1); ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "category", escaped); g_free (escaped); xmlFree (label); } }
static void atom10_parse_entry_category (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *category = NULL; category = xml_get_ns_attribute (cur, "label", NULL); if (!category) category = xml_get_ns_attribute (cur, "term", NULL); if (category) { gchar *escaped = g_markup_escape_text (category, -1); ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "category", escaped); g_free (escaped); xmlFree (category); } }
GSList * metadata_list_copy (GSList *list) { GSList *copy = NULL; GSList *iter2, *iter = list; struct pair *p; while (iter) { p = (struct pair*)iter->data; iter2 = p->data; while (iter2) { copy = metadata_list_append (copy, p->strid, iter2->data); iter2 = iter2->next; } iter = iter->next; } return copy; }
static void parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp; /* We ignore the "ping" tag */ if (xmlStrcmp (cur->name, BAD_CAST"about")) return; /* RSS 1.0 */ tmp = xml_get_attribute (cur, "about"); /* RSS 2.0 */ if (!tmp) tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (tmp) { ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "related", tmp); g_free (tmp); } }
static void inoreader_source_merge_feed (InoreaderSourcePtr source, const gchar *url, const gchar *title, const gchar *id) { nodePtr node; GSList *iter; /* check if node to be merged already exists */ iter = source->root->children; while (iter) { node = (nodePtr)iter->data; if (g_str_equal (node->subscription->source, url)) return; iter = g_slist_next (iter); } debug2 (DEBUG_UPDATE, "adding %s (%s)", title, url); node = node_new (feed_get_node_type ()); node_set_title (node, title); node_set_data (node, feed_new ()); node_set_subscription (node, subscription_new (url, NULL, NULL)); node->subscription->type = &inoreaderSourceFeedSubscriptionType; /* Save Inoreader feed id which we need to fetch items... */ node->subscription->metadata = metadata_list_append (node->subscription->metadata, "inoreader-feed-id", id); db_subscription_update (node->subscription); node_set_parent (node, source->root, -1); feedlist_node_imported (node); /** * @todo mark the ones as read immediately after this is done * the feed as retrieved by this has the read and unread * status inherently. */ subscription_update (node->subscription, FEED_REQ_RESET_TITLE | FEED_REQ_PRIORITY_HIGH); subscription_update_favicon (node->subscription); }
/* method to parse standard tags for each item element */ itemPtr parseCDFItem(feedParserCtxtPtr ctxt, xmlNodePtr cur, CDFChannelPtr cp) { gchar *tmp = NULL, *tmp2, *tmp3; if(CDFToMetadataMapping == NULL) { CDFToMetadataMapping = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_insert(CDFToMetadataMapping, "author", "author"); g_hash_table_insert(CDFToMetadataMapping, "category", "category"); } ctxt->item = item_new(); /* save the item link */ if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"))) tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { item_set_source(ctxt->item, tmp); g_free(tmp); } cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } /* save first link to a channel image */ if(NULL != (tmp = g_ascii_strdown((gchar *)cur->name, -1))) { if(NULL != (tmp2 = g_hash_table_lookup(CDFToMetadataMapping, tmp))) { if(NULL != (tmp3 = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE))) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, tmp2, tmp3); g_free(tmp3); } } g_free(tmp); } if((!xmlStrcasecmp(cur->name, BAD_CAST"logo"))) { if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"))) tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "imageUrl", tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"title"))) { if(NULL != (tmp = unhtmlize((gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)))) { item_set_title(ctxt->item, tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"abstract"))) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"a"))) { if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"))) tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { item_set_source(ctxt->item, tmp); g_free(tmp); } } cur = cur->next; } ctxt->item->readStatus = FALSE; return ctxt->item; }
/* Note: this function is called for both item and feed context */ static gchar * atom10_parse_link (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *href, *alternate = NULL; href = xml_get_ns_attribute (cur, "href", NULL); if (href) { xmlChar *baseURL = xmlNodeGetBase (cur->doc, cur); gchar *url, *relation, *type, *escTitle = NULL, *title; const gchar *feedURL = subscription_get_homepage (ctxt->subscription); if (!baseURL && feedURL && feedURL[0] != '|' && strstr (feedURL, "://")) baseURL = xmlStrdup (BAD_CAST (feedURL)); url = (gchar *)common_build_url (href, (gchar *)baseURL); type = xml_get_ns_attribute (cur, "type", NULL); relation = xml_get_ns_attribute (cur, "rel", NULL); title = xml_get_ns_attribute (cur, "title", NULL); if (title) escTitle = g_markup_escape_text (title, -1); if (!xmlHasNsProp (cur, BAD_CAST"rel", NULL) || !relation || g_str_equal (relation, BAD_CAST"alternate")) alternate = g_strdup (url); else if (g_str_equal (relation, "replies")) { if (!type || g_str_equal (type, BAD_CAST"application/atom+xml")) { gchar *commentUri = (gchar *)common_build_url ((gchar *)url, subscription_get_homepage (ctxt->subscription)); if (ctxt->item) metadata_list_set (&ctxt->item->metadata, "commentFeedUri", commentUri); g_free (commentUri); } } else if (g_str_equal (relation, "enclosure")) { if (ctxt->item) { gsize length = 0; gchar *lengthStr = xml_get_ns_attribute (cur, "length", NULL); if (lengthStr) length = atol (lengthStr); g_free (lengthStr); gchar *encStr = enclosure_values_to_string (url, type, length, FALSE /* not yet downloaded */); ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "enclosure", encStr); ctxt->item->hasEnclosure = TRUE; g_free (encStr); } } else if (g_str_equal (relation, "related") || g_str_equal (relation, "via")) { if (ctxt->item) ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, relation, url); } else { /* g_warning ("Unhandled Atom link with unexpected relation \"%s\"\n", relation); */ } xmlFree (title); xmlFree (baseURL); g_free (escTitle); g_free (url); g_free(relation); g_free(type); g_free(href); } else { /* FIXME: @href is required, this document is not valid Atom */; } return alternate; }
/* method to parse standard tags for the channel element */ static void parseCDFChannel(feedParserCtxtPtr ctxt, xmlNodePtr cur, CDFChannelPtr cp) { gchar *tmp, *tmp2, *tmp3; cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } if((!xmlStrcasecmp(cur->name, BAD_CAST"logo"))) { tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"); metadata_list_set (&ctxt->subscription->metadata, "imageUrl", tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"a"))) { xmlChar *value = xmlGetProp(cur, BAD_CAST"HREF"); if(value) { subscription_set_homepage (ctxt->subscription, (gchar *)value); xmlFree(value); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"item"))) { ctxt->item = parseCDFItem(ctxt, cur, cp); if(ctxt->item) { if(0 == ctxt->item->time) ctxt->item->time = cp->time; ctxt->items = g_list_append(ctxt->items, ctxt->item); } } else if(!xmlStrcasecmp(cur->name, BAD_CAST "title")) { tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE); if(tmp) { tmp = unhtmlize(tmp); if(ctxt->title) g_free(ctxt->title); ctxt->title = tmp; } } else if (!xmlStrcasecmp(cur->name, BAD_CAST "abstract")) { tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, TRUE); if (tmp) { metadata_list_set (&ctxt->subscription->metadata, "description", tmp); xmlFree (tmp); } } else { tmp = g_ascii_strdown((gchar *)cur->name, -1); tmp2 = g_hash_table_lookup(channelHash, tmp); if(tmp2) { tmp3 = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE); if(tmp3) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, tmp2, tmp3); g_free(tmp3); } } g_free(tmp); } cur = cur->next; } }
GList * json_api_get_items (const gchar *json, const gchar *root, jsonApiMapping *mapping, jsonApiItemCallbackFunc callback) { GList *items = NULL; JsonParser *parser = json_parser_new (); if (json_parser_load_from_data (parser, json, -1, NULL)) { JsonArray *array = json_node_get_array (json_get_node (json_parser_get_root (parser), root)); GList *elements = json_array_get_elements (array); GList *iter = elements; debug1 (DEBUG_PARSING, "JSON API: found items root node \"%s\"", root); while (iter) { JsonNode *node = (JsonNode *)iter->data; itemPtr item = item_new (); /* Parse default feeds */ item_set_id (item, json_api_get_string (node, mapping->id)); item_set_title (item, json_api_get_string (node, mapping->title)); item_set_source (item, json_api_get_string (node, mapping->link)); item->time = json_api_get_int (node, mapping->updated); item->readStatus = json_api_get_bool (node, mapping->read); item->flagStatus = json_api_get_bool (node, mapping->flag); if (mapping->negateRead) item->readStatus = !item->readStatus; /* Handling encoded content */ const gchar *content; gchar *xhtml; content = json_api_get_string (node, mapping->description); if (mapping->xhtml) { xhtml = xhtml_extract_from_string (content, NULL); item_set_description (item, xhtml); xmlFree (xhtml); } else { item_set_description (item, content); } /* Optional meta data */ const gchar *tmp = json_api_get_string (node, mapping->author); if (tmp) item->metadata = metadata_list_append (item->metadata, "author", tmp); items = g_list_append (items, (gpointer)item); /* Allow optional item callback to process stuff */ if (callback) (*callback)(node, item); iter = g_list_next (iter); } g_list_free (elements); g_object_unref (parser); } else { debug1 (DEBUG_PARSING, "Could not parse JSON \"%s\"", json); } return items; }
/* method to parse standard tags for each item element */ itemPtr parseEntry(feedParserCtxtPtr ctxt, xmlNodePtr cur) { xmlChar *xtmp; gchar *tmp2, *tmp; NsHandler *nsh; parseItemTagFunc pf; g_assert(NULL != cur); ctxt->item = item_new(); cur = cur->xmlChildrenNode; while(cur) { if(!cur->name) { g_warning("invalid XML: parser returns NULL value -> tag ignored!"); cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { if(NULL != (pf = nsh->parseItemTag)) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { if(NULL != (tmp = unhtmlize(pie_parse_content_construct(cur)))) { item_set_title(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { if(NULL != (tmp2 = xml_get_attribute(cur, "href"))) { /* 0.3 link : rel, type and href attribute */ xtmp = xmlGetProp(cur, BAD_CAST"rel"); if(xtmp != NULL && !xmlStrcmp(xtmp, BAD_CAST"alternate")) item_set_source(ctxt->item, tmp2); /* else FIXME: Maybe do something with other links? */ xmlFree(xtmp); g_free(tmp2); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_source(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "author", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { /* parse feed contributors */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "contributor", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"id")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_id(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"issued")) { /* FIXME: is <modified> or <issued> or <created> the time tag we want to display? */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"content")) { /* <content> support */ if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"summary")) { /* <summary> can be used for short text descriptions, if there is no <content> description we show the <summary> content */ if(!item_get_description(ctxt->item)) { if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "copyright", tmp); g_free(tmp); } } cur = cur->next; } /* after parsing we fill the infos into the itemPtr structure */ ctxt->item->readStatus = FALSE; return ctxt->item; }
/* reads a PIE feed URL and returns a new channel structure (even if the feed could not be read) */ static void pie_parse(feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp2, *tmp = NULL, *tmp3; NsHandler *nsh; parseChannelTagFunc pf; while(TRUE) { if(xmlStrcmp(cur->name, BAD_CAST"feed")) { g_string_append(ctxt->feed->parseErrors, "<p>Could not find Atom/Echo/PIE header!</p>"); break; } /* parse feed contents */ cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { pf = nsh->parseChannelTag; if(pf) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { tmp = unhtmlize(pie_parse_content_construct(cur)); if(tmp) { if(ctxt->title) g_free(ctxt->title); ctxt->title = tmp; } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { tmp = xml_get_attribute (cur, "href"); if(tmp) { /* 0.3 link : rel, type and href attribute */ tmp2 = xml_get_attribute (cur, "rel"); if(tmp2 && g_str_equal(tmp2, "alternate")) subscription_set_homepage (ctxt->subscription, tmp); /* else FIXME: Maybe do something with other links? */ g_free(tmp2); g_free(tmp); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { subscription_set_homepage (ctxt->subscription, tmp); g_free(tmp); } } /* parse feed author */ } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "author", tmp); g_free(tmp); } } else if (!xmlStrcmp (cur->name, BAD_CAST"tagline")) { tmp = pie_parse_content_construct (cur); if (tmp) { metadata_list_set (&ctxt->subscription->metadata, "description", tmp); g_free (tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"generator")) { tmp = unhtmlize((gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1)); if(tmp && tmp[0] != '\0') { tmp2 = xml_get_attribute (cur, "version"); if(tmp2) { tmp3 = g_strdup_printf("%s %s", tmp, tmp2); g_free(tmp); g_free(tmp2); tmp = tmp3; } tmp2 = xml_get_attribute (cur, "url"); if(tmp2) { tmp3 = g_strdup_printf("<a href=\"%s\">%s</a>", tmp2, tmp); g_free(tmp2); g_free(tmp); tmp = tmp3; } ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "feedgenerator", tmp); } g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { tmp = pie_parse_content_construct(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "copyright", tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"modified")) { /* Modified was last used in IETF draft 02) */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"updated")) { /* Updated was added in IETF draft 03 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "contributor", tmp); g_free(tmp); } } else if((!xmlStrcmp(cur->name, BAD_CAST"entry"))) { ctxt->item = parseEntry(ctxt, cur); if(ctxt->item) { if(0 == ctxt->item->time) ctxt->item->time = ctxt->feed->time; ctxt->items = g_list_append(ctxt->items, ctxt->item); } } cur = cur->next; } break; } }