static void parse_channel_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *value; value = xml_get_attribute (cur, "resource"); if (!xmlStrcmp (BAD_CAST "errorReportsTo", cur->name)) metadata_list_set (&(ctxt->subscription->metadata), "errorReportsTo", value); else if (!xmlStrcmp (BAD_CAST "generatorAgent", cur->name)) metadata_list_set (&(ctxt->subscription->metadata), "feedgeneratorUri", value); g_free (value); }
static void parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp, *thumbnail, *imgsrc; if (!xmlStrcmp (BAD_CAST"thumbnail", cur->name) || !xmlStrcmp (BAD_CAST"thumb", cur->name)) { tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if (tmp) { if (g_utf8_strlen (tmp, -1) > 0) g_hash_table_insert (ctxt->item->tmpdata, "photo:thumbnail", tmp); else g_free (tmp); } } else if (!xmlStrcmp (BAD_CAST"imgsrc", cur->name)) { tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if (tmp) { if (g_utf8_strlen (tmp, -1) > 0) g_hash_table_insert (ctxt->item->tmpdata, "photo:imgsrc", tmp); else g_free (tmp); } } thumbnail = g_hash_table_lookup (ctxt->item->tmpdata, "photo:thumbnail"); imgsrc = g_hash_table_lookup (ctxt->item->tmpdata, "photo:imgsrc"); if (!thumbnail) { /* we do nothing */ } else { tmp = g_strdup_printf ("%s,%s", thumbnail, imgsrc?imgsrc:""); metadata_list_set (&(ctxt->item->metadata), "photo", tmp); g_free (tmp); } }
void subscription_set_homepage (subscriptionPtr subscription, const gchar *newHtmlUrl) { gchar *htmlUrl = NULL; if (newHtmlUrl) { if (strstr (newHtmlUrl, "://")) { /* absolute URI can be used directly */ htmlUrl = g_strchomp (g_strdup (newHtmlUrl)); } else { /* relative URI part needs to be expanded */ gchar *tmp, *source; source = g_strdup (subscription_get_source (subscription)); tmp = strrchr (source, '/'); if (tmp) *(tmp+1) = '\0'; htmlUrl = common_build_url (newHtmlUrl, source); g_free (source); } metadata_list_set (&subscription->metadata, "homepage", htmlUrl); g_free (htmlUrl); } }
static void atom10_parse_feed_subtitle (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *subtitle; subtitle = atom10_parse_text_construct (cur, TRUE); if (subtitle) { metadata_list_set (&ctxt->subscription->metadata, "description", subtitle); g_free (subtitle); } }
static void atom10_parse_feed_logo (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *logoUrl; logoUrl = atom10_parse_text_construct (cur, FALSE); if (logoUrl) { metadata_list_set (&ctxt->subscription->metadata, "imageUrl", logoUrl); g_free (logoUrl); } }
static void parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *uri = NULL; if (!xmlStrcmp (BAD_CAST"commentRss", cur->name) || !xmlStrcmp (BAD_CAST"commentRSS", cur->name)) uri = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1); if (uri) { metadata_list_set (&(ctxt->item->metadata), "commentFeedUri", uri); g_free (uri); } }
static void ns_itunes_parse_channel_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp; const gchar *old; if (!xmlStrcmp (cur->name, BAD_CAST"summary") || !xmlStrcmp (cur->name, BAD_CAST"subtitle")) { tmp = xhtml_extract (cur, 0, NULL); old = metadata_list_get (ctxt->subscription->metadata, "description"); if (!old || strlen (old) < strlen (tmp)) metadata_list_set (&ctxt->subscription->metadata, "description", tmp); g_free (tmp); } }
static void parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *date, *source, *sourceURL, *tmp; gboolean sourceTag = FALSE; if (!xmlStrcmp (BAD_CAST "source", cur->name)) { sourceTag = TRUE; g_hash_table_insert (ctxt->item->tmpdata, g_strdup ("ag:source"), (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)); } else if (!xmlStrcmp (BAD_CAST "sourceURL", cur->name)) { sourceTag = TRUE; g_hash_table_insert (ctxt->item->tmpdata, g_strdup ("ag:sourceURL"), (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)); } if (sourceTag) { source = g_hash_table_lookup (ctxt->item->tmpdata, "ag:source"); sourceURL = g_hash_table_lookup (ctxt->item->tmpdata, "ag:sourceURL"); if (source && sourceURL) tmp = g_strdup_printf ("<a href=\"%s\">%s</a>", sourceURL, source); else if (!source) tmp = g_strdup_printf ("<a href=\"%s\">%s</a>", sourceURL, sourceURL); else tmp = g_strdup (source); metadata_list_set (&(ctxt->item->metadata), "agSource", tmp); } else if (!xmlStrcmp (BAD_CAST "timestamp", cur->name)) { if (NULL != (tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1))) { date = date_format (date_parse_ISO8601 (tmp), _("%b %d %H:%M")); metadata_list_set (&(ctxt->item->metadata), "agTimestamp", date); g_free (date); g_free (tmp); } } }
static void ttrss_source_merge_feed (ttrssSourcePtr source, const gchar *url, const gchar *title, gint64 id) { nodePtr node; gchar *tmp; /* check if node to be merged already exists */ node = feedlist_find_node (source->root, NODE_BY_URL, url); if (!node) { debug2 (DEBUG_UPDATE, "adding %s (%s)", title, url); node = node_new (feed_get_node_type ()); node_set_title (node, title); node_set_data (node, feed_new ()); node_set_subscription (node, subscription_new (url, NULL, NULL)); node->subscription->type = &ttrssSourceFeedSubscriptionType; /* Save tt-rss feed id which we need to fetch items... */ tmp = g_strdup_printf ("%" G_GINT64_FORMAT, id); metadata_list_set (&node->subscription->metadata, "ttrss-feed-id", tmp); g_free (tmp); node_set_parent (node, source->root, -1); feedlist_node_imported (node); /** * @todo mark the ones as read immediately after this is done * the feed as retrieved by this has the read and unread * status inherently. */ subscription_update (node->subscription, FEED_REQ_RESET_TITLE | FEED_REQ_PRIORITY_HIGH); subscription_update_favicon (node->subscription); /* Important: we must not loose the feed id! */ db_subscription_update (node->subscription); } debug2 (DEBUG_UPDATE, "updating folder for %s (%s)", title, url); ttrss_source_update_folder (source, node); }
/* simple function to retrieve an OPML document and parse and output all depth 1 outline tags as HTML into a buffer */ static void ns_blogChannel_download_request_cb (const struct updateResult * const result, gpointer user_data, guint32 flags) { struct requestData *requestData = user_data; xmlDocPtr doc = NULL; xmlNodePtr cur; GString *buffer = NULL; g_assert (NULL != requestData); if (result->data) { buffer = g_string_new (NULL); while (1) { doc = xmlRecoverMemory (result->data, result->size); if (NULL == doc) break; if (NULL == (cur = xmlDocGetRootElement (doc))) break; if (!xmlStrcmp (cur->name, BAD_CAST"opml") || !xmlStrcmp (cur->name, BAD_CAST"oml") || !xmlStrcmp (cur->name, BAD_CAST"outlineDocument")) { /* nothing */ } else break; cur = cur->xmlChildrenNode; while (cur) { if (!xmlStrcmp (cur->name, BAD_CAST"body")) { /* process all <outline> tags */ cur = cur->xmlChildrenNode; while (cur) { if (!xmlStrcmp (cur->name, BAD_CAST"outline")) { GString *tmp = getOutlineContents (cur); g_string_append_printf (buffer, "%s<br />", tmp->str); g_string_free (tmp, TRUE); } cur = cur->next; } break; } cur = cur->next; } break; } if (doc) xmlFreeDoc (doc); } if (buffer) { switch (requestData->tag) { case TAG_BLOGROLL: g_string_prepend (buffer, BLOGROLL_START); g_string_append (buffer, BLOGROLL_END); g_hash_table_insert (requestData->ctxt->tmpdata, g_strdup ("bC:blogRoll"), buffer->str); break; case TAG_MYSUBSCRIPTIONS: g_string_prepend (buffer, MYSUBSCR_START); g_string_append (buffer, MYSUBSCR_END); g_hash_table_insert (requestData->ctxt->tmpdata, g_strdup ("bC:mySubscriptions"), buffer->str); break; default: g_error ("wrong requestData->tag value"); break; } g_string_free (buffer, FALSE); buffer = g_string_new (NULL); g_string_append (buffer, g_hash_table_lookup (requestData->ctxt->tmpdata, "bC:blink")); g_string_append (buffer, g_hash_table_lookup (requestData->ctxt->tmpdata, "bC:blogRoll")); g_string_append (buffer, g_hash_table_lookup (requestData->ctxt->tmpdata, "bC:mySubscriptions")); metadata_list_set (&(requestData->ctxt->subscription->metadata), "blogChannel", buffer->str); g_string_free (buffer, TRUE); } g_list_free (requestData->ctxt->items); feed_free_parser_ctxt (requestData->ctxt); g_free (requestData); }
/* reads a PIE feed URL and returns a new channel structure (even if the feed could not be read) */ static void pie_parse(feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp2, *tmp = NULL, *tmp3; NsHandler *nsh; parseChannelTagFunc pf; while(TRUE) { if(xmlStrcmp(cur->name, BAD_CAST"feed")) { g_string_append(ctxt->feed->parseErrors, "<p>Could not find Atom/Echo/PIE header!</p>"); break; } /* parse feed contents */ cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { pf = nsh->parseChannelTag; if(pf) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { tmp = unhtmlize(pie_parse_content_construct(cur)); if(tmp) { if(ctxt->title) g_free(ctxt->title); ctxt->title = tmp; } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { tmp = xml_get_attribute (cur, "href"); if(tmp) { /* 0.3 link : rel, type and href attribute */ tmp2 = xml_get_attribute (cur, "rel"); if(tmp2 && g_str_equal(tmp2, "alternate")) subscription_set_homepage (ctxt->subscription, tmp); /* else FIXME: Maybe do something with other links? */ g_free(tmp2); g_free(tmp); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { subscription_set_homepage (ctxt->subscription, tmp); g_free(tmp); } } /* parse feed author */ } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "author", tmp); g_free(tmp); } } else if (!xmlStrcmp (cur->name, BAD_CAST"tagline")) { tmp = pie_parse_content_construct (cur); if (tmp) { metadata_list_set (&ctxt->subscription->metadata, "description", tmp); g_free (tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"generator")) { tmp = unhtmlize((gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1)); if(tmp && tmp[0] != '\0') { tmp2 = xml_get_attribute (cur, "version"); if(tmp2) { tmp3 = g_strdup_printf("%s %s", tmp, tmp2); g_free(tmp); g_free(tmp2); tmp = tmp3; } tmp2 = xml_get_attribute (cur, "url"); if(tmp2) { tmp3 = g_strdup_printf("<a href=\"%s\">%s</a>", tmp2, tmp); g_free(tmp2); g_free(tmp); tmp = tmp3; } ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "feedgenerator", tmp); } g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { tmp = pie_parse_content_construct(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "copyright", tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"modified")) { /* Modified was last used in IETF draft 02) */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"updated")) { /* Updated was added in IETF draft 03 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "contributor", tmp); g_free(tmp); } } else if((!xmlStrcmp(cur->name, BAD_CAST"entry"))) { ctxt->item = parseEntry(ctxt, cur); if(ctxt->item) { if(0 == ctxt->item->time) ctxt->item->time = ctxt->feed->time; ctxt->items = g_list_append(ctxt->items, ctxt->item); } } cur = cur->next; } break; } }
/* Note: this function is called for both item and feed context */ static gchar * atom10_parse_link (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *href, *alternate = NULL; href = xml_get_ns_attribute (cur, "href", NULL); if (href) { xmlChar *baseURL = xmlNodeGetBase (cur->doc, cur); gchar *url, *relation, *type, *escTitle = NULL, *title; const gchar *feedURL = subscription_get_homepage (ctxt->subscription); if (!baseURL && feedURL && feedURL[0] != '|' && strstr (feedURL, "://")) baseURL = xmlStrdup (BAD_CAST (feedURL)); url = (gchar *)common_build_url (href, (gchar *)baseURL); type = xml_get_ns_attribute (cur, "type", NULL); relation = xml_get_ns_attribute (cur, "rel", NULL); title = xml_get_ns_attribute (cur, "title", NULL); if (title) escTitle = g_markup_escape_text (title, -1); if (!xmlHasNsProp (cur, BAD_CAST"rel", NULL) || !relation || g_str_equal (relation, BAD_CAST"alternate")) alternate = g_strdup (url); else if (g_str_equal (relation, "replies")) { if (!type || g_str_equal (type, BAD_CAST"application/atom+xml")) { gchar *commentUri = (gchar *)common_build_url ((gchar *)url, subscription_get_homepage (ctxt->subscription)); if (ctxt->item) metadata_list_set (&ctxt->item->metadata, "commentFeedUri", commentUri); g_free (commentUri); } } else if (g_str_equal (relation, "enclosure")) { if (ctxt->item) { gsize length = 0; gchar *lengthStr = xml_get_ns_attribute (cur, "length", NULL); if (lengthStr) length = atol (lengthStr); g_free (lengthStr); gchar *encStr = enclosure_values_to_string (url, type, length, FALSE /* not yet downloaded */); ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "enclosure", encStr); ctxt->item->hasEnclosure = TRUE; g_free (encStr); } } else if (g_str_equal (relation, "related") || g_str_equal (relation, "via")) { if (ctxt->item) ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, relation, url); } else { /* g_warning ("Unhandled Atom link with unexpected relation \"%s\"\n", relation); */ } xmlFree (title); xmlFree (baseURL); g_free (escTitle); g_free (url); g_free(relation); g_free(type); g_free(href); } else { /* FIXME: @href is required, this document is not valid Atom */; } return alternate; }
/* method to parse standard tags for the channel element */ static void parseCDFChannel(feedParserCtxtPtr ctxt, xmlNodePtr cur, CDFChannelPtr cp) { gchar *tmp, *tmp2, *tmp3; cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } if((!xmlStrcasecmp(cur->name, BAD_CAST"logo"))) { tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"); metadata_list_set (&ctxt->subscription->metadata, "imageUrl", tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"a"))) { xmlChar *value = xmlGetProp(cur, BAD_CAST"HREF"); if(value) { subscription_set_homepage (ctxt->subscription, (gchar *)value); xmlFree(value); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"item"))) { ctxt->item = parseCDFItem(ctxt, cur, cp); if(ctxt->item) { if(0 == ctxt->item->time) ctxt->item->time = cp->time; ctxt->items = g_list_append(ctxt->items, ctxt->item); } } else if(!xmlStrcasecmp(cur->name, BAD_CAST "title")) { tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE); if(tmp) { tmp = unhtmlize(tmp); if(ctxt->title) g_free(ctxt->title); ctxt->title = tmp; } } else if (!xmlStrcasecmp(cur->name, BAD_CAST "abstract")) { tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, TRUE); if (tmp) { metadata_list_set (&ctxt->subscription->metadata, "description", tmp); xmlFree (tmp); } } else { tmp = g_ascii_strdown((gchar *)cur->name, -1); tmp2 = g_hash_table_lookup(channelHash, tmp); if(tmp2) { tmp3 = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE); if(tmp3) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, tmp2, tmp3); g_free(tmp3); } } g_free(tmp); } cur = cur->next; } }