/* generic tag parsing (used for RSS and Atom) */ static void parse_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur, gboolean isFeedTag) { int i, j; gchar *date, *value, *tmp; const gchar *mapping; gboolean isNotEmpty; if (!isFeedTag) { /* special handling for the ISO 8601 date item tags */ if (!xmlStrcmp (BAD_CAST "date", cur->name)) { if (NULL != (date = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1))) { i = date_parse_ISO8601 (date); ctxt->item->time = i; g_free (date); } return; } /* special handling for item titles */ if(!xmlStrcmp (BAD_CAST "title", cur->name)) { value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if(value) { item_set_title(ctxt->item, value); g_free(value); } return; } } /* compare with each possible tag name */ for (i = 0; taglist[i] != NULL; i++) { if (!xmlStrcmp ((const xmlChar *)taglist[i], cur->name)) { value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); if (value) { /* check if value consist of whitespaces only */ for (j = 0, tmp = value, isNotEmpty = FALSE; j < g_utf8_strlen (value, -1); j++) { if (!g_unichar_isspace (*tmp)) { isNotEmpty = TRUE; break; } tmp = g_utf8_next_char (tmp); } if (isNotEmpty) { if (isFeedTag) { if (NULL != (mapping = mapToFeedMetadata[i])) ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, mapping, value); } else { if (NULL != (mapping = mapToItemMetadata[i])) ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, mapping, value); } } g_free (value); } return; } } }
static void atom10_parse_entry_title (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state) { gchar *title; title = atom10_parse_text_construct(cur, FALSE); if (title) { item_set_title (ctxt->item, title); g_free (title); } }
GList * json_api_get_items (const gchar *json, const gchar *root, jsonApiMapping *mapping, jsonApiItemCallbackFunc callback) { GList *items = NULL; JsonParser *parser = json_parser_new (); if (json_parser_load_from_data (parser, json, -1, NULL)) { JsonArray *array = json_node_get_array (json_get_node (json_parser_get_root (parser), root)); GList *elements = json_array_get_elements (array); GList *iter = elements; debug1 (DEBUG_PARSING, "JSON API: found items root node \"%s\"", root); while (iter) { JsonNode *node = (JsonNode *)iter->data; itemPtr item = item_new (); /* Parse default feeds */ item_set_id (item, json_api_get_string (node, mapping->id)); item_set_title (item, json_api_get_string (node, mapping->title)); item_set_source (item, json_api_get_string (node, mapping->link)); item->time = json_api_get_int (node, mapping->updated); item->readStatus = json_api_get_bool (node, mapping->read); item->flagStatus = json_api_get_bool (node, mapping->flag); if (mapping->negateRead) item->readStatus = !item->readStatus; /* Handling encoded content */ const gchar *content; gchar *xhtml; content = json_api_get_string (node, mapping->description); if (mapping->xhtml) { xhtml = xhtml_extract_from_string (content, NULL); item_set_description (item, xhtml); xmlFree (xhtml); } else { item_set_description (item, content); } /* Optional meta data */ const gchar *tmp = json_api_get_string (node, mapping->author); if (tmp) item->metadata = metadata_list_append (item->metadata, "author", tmp); items = g_list_append (items, (gpointer)item); /* Allow optional item callback to process stuff */ if (callback) (*callback)(node, item); iter = g_list_next (iter); } g_list_free (elements); g_object_unref (parser); } else { debug1 (DEBUG_PARSING, "Could not parse JSON \"%s\"", json); } return items; }
static void ttrss_feed_subscription_process_update_result (subscriptionPtr subscription, const struct updateResult* const result, updateFlags flags) { if (result->data && result->httpstatus == 200) { JsonParser *parser = json_parser_new (); if (json_parser_load_from_data (parser, result->data, -1, NULL)) { JsonArray *array = json_node_get_array (json_get_node (json_parser_get_root (parser), "content")); GList *elements = json_array_get_elements (array); GList *iter = elements; GList *items = NULL; /* We expect to get something like this [{"id":118, "unread":true, "marked":false, "updated":1287927675, "is_updated":false, "title":"IBM Says New ...", "link":"http:\/\/rss.slashdot.org\/~r\/Slashdot\/slashdot\/~3\/ALuhNKO3NV4\/story01.htm", "feed_id":"5", "content":"coondoggie writes ..." }, {"id":117, "unread":true, "marked":false, "updated":1287923814, [...] */ while (iter) { JsonNode *node = (JsonNode *)iter->data; itemPtr item = item_new (); gchar *id; const gchar *content; gchar *xhtml; id = g_strdup_printf ("%" G_GINT64_FORMAT, json_get_int (node, "id")); item_set_id (item, id); g_free (id); item_set_title (item, json_get_string (node, "title")); item_set_source (item, json_get_string (node, "link")); content = json_get_string (node, "content"); xhtml = xhtml_extract_from_string (content, NULL); item_set_description (item, xhtml); xmlFree (xhtml); item->time = json_get_int (node, "updated"); if (json_get_bool (node, "unread")) { item->readStatus = FALSE; } else { item->readStatus = TRUE; } if (json_get_bool (node, "marked")) item->flagStatus = TRUE; items = g_list_append (items, (gpointer)item); iter = g_list_next (iter); } g_list_free (elements); /* merge against feed cache */ if (items) { itemSetPtr itemSet = node_get_itemset (subscription->node); gint newCount = itemset_merge_items (itemSet, items, TRUE /* feed valid */, FALSE /* markAsRead */); itemlist_merge_itemset (itemSet); itemset_free (itemSet); feedlist_node_was_updated (subscription->node, newCount); } subscription->node->available = TRUE; } else { subscription->node->available = FALSE; g_string_append (((feedPtr)subscription->node->data)->parseErrors, _("Could not parse JSON returned by TinyTinyRSS API!")); } g_object_unref (parser); } else { subscription->node->available = FALSE; } }
/* method to parse standard tags for each item element */ itemPtr parseEntry(feedParserCtxtPtr ctxt, xmlNodePtr cur) { xmlChar *xtmp; gchar *tmp2, *tmp; NsHandler *nsh; parseItemTagFunc pf; g_assert(NULL != cur); ctxt->item = item_new(); cur = cur->xmlChildrenNode; while(cur) { if(!cur->name) { g_warning("invalid XML: parser returns NULL value -> tag ignored!"); cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { if(NULL != (pf = nsh->parseItemTag)) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { if(NULL != (tmp = unhtmlize(pie_parse_content_construct(cur)))) { item_set_title(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { if(NULL != (tmp2 = xml_get_attribute(cur, "href"))) { /* 0.3 link : rel, type and href attribute */ xtmp = xmlGetProp(cur, BAD_CAST"rel"); if(xtmp != NULL && !xmlStrcmp(xtmp, BAD_CAST"alternate")) item_set_source(ctxt->item, tmp2); /* else FIXME: Maybe do something with other links? */ xmlFree(xtmp); g_free(tmp2); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_source(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "author", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { /* parse feed contributors */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "contributor", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"id")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_id(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"issued")) { /* FIXME: is <modified> or <issued> or <created> the time tag we want to display? */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"content")) { /* <content> support */ if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"summary")) { /* <summary> can be used for short text descriptions, if there is no <content> description we show the <summary> content */ if(!item_get_description(ctxt->item)) { if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "copyright", tmp); g_free(tmp); } } cur = cur->next; } /* after parsing we fill the infos into the itemPtr structure */ ctxt->item->readStatus = FALSE; return ctxt->item; }
/** * itemset_generic_merge_check: (skip) * @items: existing items * @newItem: new item to merge * @maxChecks: maximum number of item checks * @allowUpdates: TRUE if item content update is to be * allowed for existing items * @allowStateChanges: TRUE if item state shall be * overwritten by source * * Generic merge logic suitable for feeds * * Returns: TRUE if merging instead of updating is necessary) */ static gboolean itemset_generic_merge_check (GList *items, itemPtr newItem, gint maxChecks, gboolean allowUpdates, gboolean allowStateChanges) { GList *oldItemIdIter = items; itemPtr oldItem = NULL; gboolean found, equal = FALSE; guint reason = 0; /* determine if we should add it... */ debug3 (DEBUG_CACHE, "check new item for merging: \"%s\", %i, %i", item_get_title (newItem), allowUpdates, allowStateChanges); /* compare to every existing item in this feed */ found = FALSE; while (oldItemIdIter) { oldItem = (itemPtr)(oldItemIdIter->data); /* try to compare the two items */ /* trivial case: one item has id the other doesn't -> they can't be equal */ if (((item_get_id (oldItem) == NULL) && (item_get_id (newItem) != NULL)) || ((item_get_id (oldItem) != NULL) && (item_get_id (newItem) == NULL))) { /* cannot be equal (different ids) so compare to next old item */ oldItemIdIter = g_list_next (oldItemIdIter); continue; } /* just for the case there are no ids: compare titles and HTML descriptions */ equal = TRUE; if (((item_get_title (oldItem) != NULL) && (item_get_title (newItem) != NULL)) && (0 != strcmp (item_get_title (oldItem), item_get_title (newItem)))) { equal = FALSE; reason |= 1; } if (((item_get_description (oldItem) != NULL) && (item_get_description (newItem) != NULL)) && (0 != strcmp (item_get_description(oldItem), item_get_description (newItem)))) { equal = FALSE; reason |= 2; } /* best case: they both have ids (position important: id check is useless without knowing if the items are different!) */ if (item_get_id (oldItem)) { if (0 == strcmp (item_get_id (oldItem), item_get_id (newItem))) { found = TRUE; if (allowStateChanges) { /* found corresponding item, check if they are REALLY equal (eg, read status may have changed) */ if(oldItem->readStatus != newItem->readStatus) { equal = FALSE; reason |= 4; } if(oldItem->flagStatus != newItem->flagStatus) { equal = FALSE; reason |= 8; } } break; } else { /* different ids, but the content might be still equal (e.g. empty) so we need to explicitly unset the equal flag !!! */ equal = FALSE; reason |= 16; } } if (equal) { found = TRUE; break; } oldItemIdIter = g_list_next (oldItemIdIter); } if (!found) { debug0 (DEBUG_CACHE, "-> item is to be added"); } else { /* if the item was found but has other contents -> update contents */ if (!equal) { if (allowUpdates) { /* no item_set_new_status() - we don't treat changed items as new items! */ item_set_title (oldItem, item_get_title (newItem)); /* don't use item_set_description as it does some unwanted length handling and we want to enforce the new description */ g_free (oldItem->description); oldItem->description = newItem->description; newItem->description = NULL; oldItem->time = newItem->time; oldItem->updateStatus = TRUE; // FIXME: this does not remove metadata from DB metadata_list_free (oldItem->metadata); oldItem->metadata = newItem->metadata; newItem->metadata = NULL; /* Only update item state for feed sources where it is necessary which means online accounts we sync against, but not normal online feeds where items have no read status. */ if (allowStateChanges) { /* To avoid notification spam from external sources: never set read items to unread again! */ if ((!oldItem->readStatus) && (newItem->readStatus)) oldItem->readStatus = newItem->readStatus; oldItem->flagStatus = newItem->flagStatus; } db_item_update (oldItem); debug1 (DEBUG_CACHE, "-> item already existing and was updated, reason %x", reason); } else { debug0 (DEBUG_CACHE, "-> item updates not merged because of parser errors"); } } else { debug0 (DEBUG_CACHE, "-> item already exists"); } } return !found; }
/* method to parse standard tags for each item element */ itemPtr parseCDFItem(feedParserCtxtPtr ctxt, xmlNodePtr cur, CDFChannelPtr cp) { gchar *tmp = NULL, *tmp2, *tmp3; if(CDFToMetadataMapping == NULL) { CDFToMetadataMapping = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_insert(CDFToMetadataMapping, "author", "author"); g_hash_table_insert(CDFToMetadataMapping, "category", "category"); } ctxt->item = item_new(); /* save the item link */ if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"))) tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { item_set_source(ctxt->item, tmp); g_free(tmp); } cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } /* save first link to a channel image */ if(NULL != (tmp = g_ascii_strdown((gchar *)cur->name, -1))) { if(NULL != (tmp2 = g_hash_table_lookup(CDFToMetadataMapping, tmp))) { if(NULL != (tmp3 = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE))) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, tmp2, tmp3); g_free(tmp3); } } g_free(tmp); } if((!xmlStrcasecmp(cur->name, BAD_CAST"logo"))) { if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"))) tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "imageUrl", tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"title"))) { if(NULL != (tmp = unhtmlize((gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)))) { item_set_title(ctxt->item, tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"abstract"))) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } else if((!xmlStrcasecmp(cur->name, BAD_CAST"a"))) { if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href"))) tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF"); if(tmp) { item_set_source(ctxt->item, tmp); g_free(tmp); } } cur = cur->next; } ctxt->item->readStatus = FALSE; return ctxt->item; }