Beispiel #1
0
/* generic tag parsing (used for RSS and Atom) */
static void
parse_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur, gboolean isFeedTag)
{
	int 		i, j;
	gchar		*date, *value, *tmp;
	const gchar	*mapping;
	gboolean	isNotEmpty;
	
	if (!isFeedTag) {
		/* special handling for the ISO 8601 date item tags */
		if (!xmlStrcmp (BAD_CAST "date", cur->name)) {
 			if (NULL != (date = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1))) {
				i = date_parse_ISO8601 (date);
				ctxt->item->time = i;
				g_free (date);
			}
			return;
		}

		/* special handling for item titles */
		if(!xmlStrcmp (BAD_CAST "title", cur->name)) {
			value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1);
			if(value) {
				item_set_title(ctxt->item, value);
				g_free(value);
			}
			return;
		}
	}

	/* compare with each possible tag name */
	for (i = 0; taglist[i] != NULL; i++) {
		if (!xmlStrcmp ((const xmlChar *)taglist[i], cur->name)) {
 			value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1);
	 		if (value) {
				/* check if value consist of whitespaces only */				
				for (j = 0, tmp = value, isNotEmpty = FALSE; j < g_utf8_strlen (value, -1); j++) {
					if (!g_unichar_isspace (*tmp)) {
						isNotEmpty = TRUE;
						break;
					}
					tmp = g_utf8_next_char (tmp);
				}

				if (isNotEmpty) {
					if (isFeedTag) {
						if (NULL != (mapping = mapToFeedMetadata[i]))
							ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, mapping, value);
					} else {
						if (NULL != (mapping = mapToItemMetadata[i]))
							ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, mapping, value);
					}
				} 
				g_free (value);
			}			
			return;
		}
	}
}
Beispiel #2
0
static void
atom10_parse_entry_title (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *title;
	
	title = atom10_parse_text_construct(cur, FALSE);
	if (title) {
		item_set_title (ctxt->item, title);
		g_free (title);
	}
}
Beispiel #3
0
GList *
json_api_get_items (const gchar *json, const gchar *root, jsonApiMapping *mapping, jsonApiItemCallbackFunc callback)
{
	GList		*items = NULL;
	JsonParser	*parser = json_parser_new ();

	if (json_parser_load_from_data (parser, json, -1, NULL)) {
		JsonArray	*array = json_node_get_array (json_get_node (json_parser_get_root (parser), root));
		GList		*elements = json_array_get_elements (array);
		GList		*iter = elements;

		debug1 (DEBUG_PARSING, "JSON API: found items root node \"%s\"", root);
                
		while (iter) {
			JsonNode *node = (JsonNode *)iter->data;
			itemPtr item = item_new ();

			/* Parse default feeds */
			item_set_id	(item, json_api_get_string (node, mapping->id));
			item_set_title	(item, json_api_get_string (node, mapping->title));
			item_set_source	(item, json_api_get_string (node, mapping->link));

			item->time       = json_api_get_int (node, mapping->updated);
			item->readStatus = json_api_get_bool (node, mapping->read);
			item->flagStatus = json_api_get_bool (node, mapping->flag);

			if (mapping->negateRead)
				item->readStatus = !item->readStatus;

			/* Handling encoded content */
			const gchar *content; 
			gchar *xhtml;

			content = json_api_get_string (node, mapping->description);
			if (mapping->xhtml) {
				xhtml = xhtml_extract_from_string (content, NULL);
				item_set_description (item, xhtml);
				xmlFree (xhtml);
			} else {
				item_set_description (item, content);
			}

			/* Optional meta data */
			const gchar *tmp = json_api_get_string (node, mapping->author);
			if (tmp)
				item->metadata = metadata_list_append (item->metadata, "author", tmp);
	
			items = g_list_append (items, (gpointer)item);

			/* Allow optional item callback to process stuff */
			if (callback)
				(*callback)(node, item);
				
			iter = g_list_next (iter);
		}

		g_list_free (elements);
		g_object_unref (parser);
	} else {
		debug1 (DEBUG_PARSING, "Could not parse JSON \"%s\"", json);
	}

	return items;
}
Beispiel #4
0
static void
ttrss_feed_subscription_process_update_result (subscriptionPtr subscription, const struct updateResult* const result, updateFlags flags)
{
	if (result->data && result->httpstatus == 200) {
		JsonParser	*parser = json_parser_new ();

		if (json_parser_load_from_data (parser, result->data, -1, NULL)) {
			JsonArray	*array = json_node_get_array (json_get_node (json_parser_get_root (parser), "content"));
			GList		*elements = json_array_get_elements (array);
			GList		*iter = elements;
			GList		*items = NULL;

			/*
			   We expect to get something like this
			   
			   [{"id":118,
			     "unread":true,
			     "marked":false,
			     "updated":1287927675,
			     "is_updated":false,
			     "title":"IBM Says New ...",
			     "link":"http:\/\/rss.slashdot.org\/~r\/Slashdot\/slashdot\/~3\/ALuhNKO3NV4\/story01.htm",
			     "feed_id":"5",
			     "content":"coondoggie writes ..."
			    },
			    {"id":117,
			     "unread":true,
			     "marked":false,
			     "updated":1287923814,
                           [...]
                         */
                         
			while (iter) {
				JsonNode *node = (JsonNode *)iter->data;
				itemPtr item = item_new ();
				gchar *id;
				const gchar *content; 
				gchar *xhtml;

				id = g_strdup_printf ("%" G_GINT64_FORMAT, json_get_int (node, "id"));
				item_set_id (item, id);
				g_free (id);
				item_set_title (item, json_get_string (node, "title"));
				item_set_source (item, json_get_string (node, "link"));

				content = json_get_string (node, "content");
				xhtml = xhtml_extract_from_string (content, NULL);
				item_set_description (item, xhtml);
				xmlFree (xhtml);

				item->time = json_get_int (node, "updated");
				
				if (json_get_bool (node, "unread")) {
					item->readStatus = FALSE;
				}
				else {
					item->readStatus = TRUE;
				}
				if (json_get_bool (node, "marked"))
					item->flagStatus = TRUE;
					
				items = g_list_append (items, (gpointer)item);
				
				iter = g_list_next (iter);
			}

			g_list_free (elements);

			/* merge against feed cache */
			if (items) {
				itemSetPtr itemSet = node_get_itemset (subscription->node);
				gint newCount = itemset_merge_items (itemSet, items, TRUE /* feed valid */, FALSE /* markAsRead */);
				itemlist_merge_itemset (itemSet);
				itemset_free (itemSet);

				feedlist_node_was_updated (subscription->node, newCount);
			}

			subscription->node->available = TRUE;
		} else {
			subscription->node->available = FALSE;

			g_string_append (((feedPtr)subscription->node->data)->parseErrors, _("Could not parse JSON returned by TinyTinyRSS API!"));
		}

		g_object_unref (parser);
	} else {
		subscription->node->available = FALSE;
	}
}
Beispiel #5
0
/* method to parse standard tags for each item element */
itemPtr parseEntry(feedParserCtxtPtr ctxt, xmlNodePtr cur) {
	xmlChar			*xtmp;
	gchar			*tmp2, *tmp;
	NsHandler		*nsh;
	parseItemTagFunc	pf;
	
	g_assert(NULL != cur);
		
	ctxt->item = item_new();
	
	cur = cur->xmlChildrenNode;
	while(cur) {
		if(!cur->name) {
			g_warning("invalid XML: parser returns NULL value -> tag ignored!");
			cur = cur->next;
			continue;
		}
		
		
		/* check namespace of this tag */
		if(cur->ns) {
			if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) ||
			   (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) {
				
				if(NULL != (pf = nsh->parseItemTag))
					(*pf)(ctxt, cur);
				cur = cur->next;
				continue;
			} else {
				/*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/
			}
		} /* explicitly no following else !!! */
		
		if(!xmlStrcmp(cur->name, BAD_CAST"title")) {
			if(NULL != (tmp = unhtmlize(pie_parse_content_construct(cur)))) {
				item_set_title(ctxt->item, tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"link")) {
			if(NULL != (tmp2 = xml_get_attribute(cur, "href"))) {
				/* 0.3 link : rel, type and href attribute */
				xtmp = xmlGetProp(cur, BAD_CAST"rel");
				if(xtmp != NULL && !xmlStrcmp(xtmp, BAD_CAST"alternate"))
					item_set_source(ctxt->item, tmp2);
				/* else
					FIXME: Maybe do something with other links? */
				xmlFree(xtmp);
				g_free(tmp2);
			} else {
				/* 0.2 link : element content is the link, or non-alternate link in 0.3 */
				if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
					item_set_source(ctxt->item, tmp);
					g_free(tmp);
				}
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"author")) {
			/* parse feed author */
			tmp =  parseAuthor(cur);
			ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "author", tmp);
			g_free(tmp);
		} else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) {
			/* parse feed contributors */
			tmp = parseAuthor(cur);
			ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "contributor", tmp);
			g_free(tmp);
		} else if(!xmlStrcmp(cur->name, BAD_CAST"id")) {
			if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
				item_set_id(ctxt->item, tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"issued")) {
			/* FIXME: is <modified> or <issued> or <created> the time tag we want to display? */
 			if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
				ctxt->item->time = date_parse_ISO8601 (tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"content")) {
			/* <content> support */
			if(NULL != (tmp = pie_parse_content_construct(cur))) {
				item_set_description(ctxt->item, tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"summary")) {			
			/* <summary> can be used for short text descriptions, if there is no
			   <content> description we show the <summary> content */
			if(!item_get_description(ctxt->item)) {
				if(NULL != (tmp = pie_parse_content_construct(cur))) {
					item_set_description(ctxt->item, tmp);
					g_free(tmp);
				}
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) {
 			if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
				ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "copyright", tmp);
				g_free(tmp);
			}
		}
		cur = cur->next;
	}
	
	/* after parsing we fill the infos into the itemPtr structure */
	ctxt->item->readStatus = FALSE;

	return ctxt->item;
}
Beispiel #6
0
/**
 * itemset_generic_merge_check: (skip)
 * @items:		existing items
 * @newItem:		new item to merge
 * @maxChecks: 		maximum number of item checks
 * @allowUpdates:	TRUE if item content update is to be
 *      		allowed for existing items
 * @allowStateChanges:	TRUE if item state shall be
 *				overwritten by source
 *
 * Generic merge logic suitable for feeds
 *
 * Returns: TRUE if merging instead of updating is necessary)
 */
static gboolean
itemset_generic_merge_check (GList *items, itemPtr newItem, gint maxChecks, gboolean allowUpdates, gboolean allowStateChanges)
{
	GList		*oldItemIdIter = items;
	itemPtr		oldItem = NULL;
	gboolean	found, equal = FALSE;
	guint		reason = 0;

	/* determine if we should add it... */
	debug3 (DEBUG_CACHE, "check new item for merging: \"%s\", %i, %i", item_get_title (newItem), allowUpdates, allowStateChanges);

	/* compare to every existing item in this feed */
	found = FALSE;
	while (oldItemIdIter) {
		oldItem = (itemPtr)(oldItemIdIter->data);

		/* try to compare the two items */

		/* trivial case: one item has id the other doesn't -> they can't be equal */
		if (((item_get_id (oldItem) == NULL) && (item_get_id (newItem) != NULL)) ||
		    ((item_get_id (oldItem) != NULL) && (item_get_id (newItem) == NULL))) {
			/* cannot be equal (different ids) so compare to
			   next old item */
			oldItemIdIter = g_list_next (oldItemIdIter);
		   	continue;
		}

		/* just for the case there are no ids: compare titles and HTML descriptions */
		equal = TRUE;

		if (((item_get_title (oldItem) != NULL) && (item_get_title (newItem) != NULL)) &&
		     (0 != strcmp (item_get_title (oldItem), item_get_title (newItem)))) {
	    		equal = FALSE;
			reason |= 1;
		}

		if (((item_get_description (oldItem) != NULL) && (item_get_description (newItem) != NULL)) &&
		     (0 != strcmp (item_get_description(oldItem), item_get_description (newItem)))) {
	    		equal = FALSE;
			reason |= 2;
		}

		/* best case: they both have ids (position important: id check is useless without knowing if the items are different!) */
		if (item_get_id (oldItem)) {
			if (0 == strcmp (item_get_id (oldItem), item_get_id (newItem))) {
				found = TRUE;

				if (allowStateChanges) {
					/* found corresponding item, check if they are REALLY equal (eg, read status may have changed) */
					if(oldItem->readStatus != newItem->readStatus) {
						equal = FALSE;
						reason |= 4;
					}
					if(oldItem->flagStatus != newItem->flagStatus) {
						equal = FALSE;
						reason |= 8;
					}
				}
				break;
			} else {
				/* different ids, but the content might be still equal (e.g. empty)
				   so we need to explicitly unset the equal flag !!!  */
				equal = FALSE;
				reason |= 16;
			}
		}

		if (equal) {
			found = TRUE;
			break;
		}

		oldItemIdIter = g_list_next (oldItemIdIter);
	}

	if (!found) {
		debug0 (DEBUG_CACHE, "-> item is to be added");
	} else {
		/* if the item was found but has other contents -> update contents */
		if (!equal) {
			if (allowUpdates) {
				/* no item_set_new_status() - we don't treat changed items as new items! */
				item_set_title (oldItem, item_get_title (newItem));

				/* don't use item_set_description as it does some unwanted length handling
				   and we want to enforce the new description */
				g_free (oldItem->description);
				oldItem->description = newItem->description;
				newItem->description = NULL;

				oldItem->time = newItem->time;
				oldItem->updateStatus = TRUE;
				// FIXME: this does not remove metadata from DB
				metadata_list_free (oldItem->metadata);
				oldItem->metadata = newItem->metadata;
				newItem->metadata = NULL;

				/* Only update item state for feed sources where it is necessary
				   which means online accounts we sync against, but not normal
				   online feeds where items have no read status. */
				if (allowStateChanges) {
					/* To avoid notification spam from external
					   sources: never set read items to unread again! */
					if ((!oldItem->readStatus) && (newItem->readStatus))
						oldItem->readStatus = newItem->readStatus;

					oldItem->flagStatus = newItem->flagStatus;
				}

				db_item_update (oldItem);
				debug1 (DEBUG_CACHE, "-> item already existing and was updated, reason %x", reason);
			} else {
				debug0 (DEBUG_CACHE, "-> item updates not merged because of parser errors");
			}
		} else {
			debug0 (DEBUG_CACHE, "-> item already exists");
		}
	}

	return !found;
}
Beispiel #7
0
/* method to parse standard tags for each item element */
itemPtr parseCDFItem(feedParserCtxtPtr ctxt, xmlNodePtr cur, CDFChannelPtr cp) {
	gchar		*tmp = NULL, *tmp2, *tmp3;

	if(CDFToMetadataMapping == NULL) {
		CDFToMetadataMapping = g_hash_table_new(g_str_hash, g_str_equal);
		g_hash_table_insert(CDFToMetadataMapping, "author", "author");
		g_hash_table_insert(CDFToMetadataMapping, "category", "category");
	}
		
	ctxt->item = item_new();
	
	/* save the item link */
	if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href")))
		tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF");
	if(tmp) {
		item_set_source(ctxt->item, tmp);
		g_free(tmp);
	}
	
	cur = cur->xmlChildrenNode;
	while(cur) {

		if(!cur->name || cur->type != XML_ELEMENT_NODE) {
			cur = cur->next;
			continue;
		}
		
		/* save first link to a channel image */
		if(NULL != (tmp = g_ascii_strdown((gchar *)cur->name, -1))) {
			if(NULL != (tmp2 = g_hash_table_lookup(CDFToMetadataMapping, tmp))) {
				if(NULL != (tmp3 = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE))) {
					ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, tmp2, tmp3);
					g_free(tmp3);
				}
			}
			g_free(tmp);
		}
		
		if((!xmlStrcasecmp(cur->name, BAD_CAST"logo"))) {
			
			if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href")))
				tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF");
			if(tmp) {
				ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "imageUrl", tmp);
				g_free(tmp);
			}
			
		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"title"))) {
			if(NULL != (tmp = unhtmlize((gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)))) {
				item_set_title(ctxt->item, tmp);
				g_free(tmp);
			}
			
		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"abstract"))) {
			if(NULL != (tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1))) {
				item_set_description(ctxt->item, tmp);
				g_free(tmp);
			}
			
		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"a"))) {
			if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href")))
				tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF");
			if(tmp) {
				item_set_source(ctxt->item, tmp);
				g_free(tmp);
			}
		}
		
		cur = cur->next;
	}

	ctxt->item->readStatus = FALSE;
	
	return ctxt->item;
}