Exemplo n.º 1
0
/* generic tag parsing (used for RSS and Atom) */
static void
parse_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur, gboolean isFeedTag)
{
	int 		i, j;
	gchar		*date, *value, *tmp;
	const gchar	*mapping;
	gboolean	isNotEmpty;
	
	if (!isFeedTag) {
		/* special handling for the ISO 8601 date item tags */
		if (!xmlStrcmp (BAD_CAST "date", cur->name)) {
 			if (NULL != (date = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1))) {
				i = date_parse_ISO8601 (date);
				ctxt->item->time = i;
				g_free (date);
			}
			return;
		}

		/* special handling for item titles */
		if(!xmlStrcmp (BAD_CAST "title", cur->name)) {
			value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1);
			if(value) {
				item_set_title(ctxt->item, value);
				g_free(value);
			}
			return;
		}
	}

	/* compare with each possible tag name */
	for (i = 0; taglist[i] != NULL; i++) {
		if (!xmlStrcmp ((const xmlChar *)taglist[i], cur->name)) {
 			value = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1);
	 		if (value) {
				/* check if value consist of whitespaces only */				
				for (j = 0, tmp = value, isNotEmpty = FALSE; j < g_utf8_strlen (value, -1); j++) {
					if (!g_unichar_isspace (*tmp)) {
						isNotEmpty = TRUE;
						break;
					}
					tmp = g_utf8_next_char (tmp);
				}

				if (isNotEmpty) {
					if (isFeedTag) {
						if (NULL != (mapping = mapToFeedMetadata[i]))
							ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, mapping, value);
					} else {
						if (NULL != (mapping = mapToItemMetadata[i]))
							ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, mapping, value);
					}
				} 
				g_free (value);
			}			
			return;
		}
	}
}
static void
theoldreader_source_merge_feed (TheOldReaderSourcePtr source, const gchar *url, const gchar *title, const gchar *id, nodePtr folder)
{
	nodePtr	node;

	node = feedlist_find_node (source->root, NODE_BY_URL, url);
	if (!node) {
		debug2 (DEBUG_UPDATE, "adding %s (%s)", title, url);
		node = node_new (feed_get_node_type ());
		node_set_title (node, title);
		node_set_data (node, feed_new ());
		
		node_set_subscription (node, subscription_new (url, NULL, NULL));
		node->subscription->type = source->root->source->type->feedSubscriptionType;
	
		/* Save TheOldReader feed id which we need to fetch items... */
		node->subscription->metadata = metadata_list_append (node->subscription->metadata, "theoldreader-feed-id", id);

		db_subscription_update (node->subscription);
	
		node_set_parent (node, folder?folder:source->root, -1);
		feedlist_node_imported (node);
		
		/**
		 * @todo mark the ones as read immediately after this is done
		 * the feed as retrieved by this has the read and unread
		 * status inherently.
		 */
		subscription_update (node->subscription, FEED_REQ_RESET_TITLE | FEED_REQ_PRIORITY_HIGH);
		subscription_update_favicon (node->subscription);

	} else {
		node_source_update_folder (node, folder);
	}
}
Exemplo n.º 3
0
static void
atom10_parse_feed_generator (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state)
{
	gchar *ret, *version, *tmp = NULL, *uri;

	ret = unhtmlize ((gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1));
	if (ret && ret[0] != '\0') {
		version = xml_get_ns_attribute (cur, "version", NULL);
		if (version) {
			tmp = g_strdup_printf ("%s %s", ret, version);
			g_free (ret);
			g_free (version);
			ret = tmp;
		}
		uri = xml_get_ns_attribute (cur, "uri", NULL);
		if (uri) {
			tmp = g_markup_printf_escaped ("<a href=\"%s\">%s</a>", uri, ret);
			g_free (uri);
			g_free (ret);
			ret = tmp;
		}
		ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "feedgenerator", tmp);
	}
	g_free (ret);
}
Exemplo n.º 4
0
static void
atom10_parse_feed_contributor (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state)
{
	/* parse feed contributors */
	gchar *contributer = atom10_parse_person_construct (cur);
	ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "contributor", contributer);
	g_free (contributer);
}
Exemplo n.º 5
0
static void
atom10_parse_feed_author (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state)
{
	/* parse feed author */
	gchar *author = atom10_parse_person_construct (cur);
	ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "author", author);
	g_free (author);
	/* FIXME: make item parsing use this author if not specified elsewhere */
}
Exemplo n.º 6
0
static GSList *
db_metadata_list_append (GSList *metadata, const char *key, const char *value)
{
	if (metadata_is_type_registered (key))
		metadata = metadata_list_append (metadata, key, value);
	else
		debug1 (DEBUG_DB, "Trying to load unregistered metadata type %s from DB.", key);

	return metadata;
}
Exemplo n.º 7
0
static void
atom10_parse_entry_contributor (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *contributor;
	
	contributor = atom10_parse_person_construct (cur);
	if (contributor) {
		ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "contributor", contributor);
		g_free (contributor);
	}
}
Exemplo n.º 8
0
static void
atom10_parse_feed_rights (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *rights;
	
	rights = atom10_parse_text_construct (cur, FALSE);
	if (rights) {
		ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "copyright", rights);
		g_free (rights);
	}
}
Exemplo n.º 9
0
static void
atom10_parse_feed_updated (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *timestamp;
	
	timestamp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1);
	if (timestamp) {
		ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "contentUpdateDate", timestamp);
		ctxt->feed->time = date_parse_ISO8601 (timestamp);
		g_free (timestamp);
	}
}
Exemplo n.º 10
0
static void
atom10_parse_entry_published (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *datestr;
	
	datestr = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1);
	if (datestr) {
		ctxt->item->time = date_parse_ISO8601 (datestr);
		ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "pubDate", datestr);
		g_free (datestr);
	}
}
Exemplo n.º 11
0
static void
atom10_parse_feed_icon (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *icon_uri;

	icon_uri = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1);

	if (icon_uri) {
		debug1 (DEBUG_PARSING, "icon URI found in atom feed: %s", icon_uri);
		ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata,
								     "icon", icon_uri);
	}
}
Exemplo n.º 12
0
static void
ns_itunes_parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur)
{
	gchar *tmp;
	
	if (!xmlStrcmp(cur->name, BAD_CAST"author")) {
		tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1);
		if (tmp) {
			ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "author", tmp);
			g_free (tmp);
		}
	}
	
	if (!xmlStrcmp (cur->name, BAD_CAST"summary")) {
		tmp = xhtml_extract (cur, 0, NULL);
		item_set_description (ctxt->item, tmp);
		g_free (tmp);
	}
	
	if (!xmlStrcmp(cur->name, BAD_CAST"keywords")) {
		gchar *keyword = tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1);
		gchar *allocated = tmp;
		/* parse comma separated list and strip leading spaces... */
		while (tmp) {
			tmp = strchr (tmp, ',');
			if (tmp) {
				*tmp = 0;
				tmp++;
			}
			while (g_unichar_isspace (*keyword)) {
				keyword = g_utf8_next_char (keyword);
			}
			ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "category", keyword);
			keyword = tmp;
		}
		g_free (allocated);
	}
}
Exemplo n.º 13
0
static void
atom10_parse_entry_updated (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *datestr;
	
	datestr = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1);
	/* if pubDate is already set, don't overwrite it */
	if (datestr && !metadata_list_get(ctxt->item->metadata, "pubDate")) {
		ctxt->item->time = date_parse_ISO8601 (datestr);
		ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "contentUpdateDate", datestr);
	}

	g_free (datestr);
}
Exemplo n.º 14
0
static void
atom10_parse_feed_category (xmlNodePtr cur, feedParserCtxtPtr ctxt, itemPtr ip, struct atom10ParserState *state)
{
	gchar *label = NULL;

	label = xml_get_ns_attribute (cur, "label", NULL);
	if (!label)
		label = xml_get_ns_attribute (cur, "term", NULL);
	
	if (label) {
		gchar *escaped = g_markup_escape_text (label, -1);
		ctxt->subscription->metadata = metadata_list_append (ctxt->subscription->metadata, "category", escaped);
		g_free (escaped);
		xmlFree (label);
	}
}
Exemplo n.º 15
0
static void
atom10_parse_entry_category (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *category = NULL;

	category = xml_get_ns_attribute (cur, "label", NULL);
	if (!category)
		category = xml_get_ns_attribute (cur, "term", NULL);

	if (category) {
		gchar *escaped = g_markup_escape_text (category, -1);
		ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "category", escaped);
		g_free (escaped);
		xmlFree (category);
	}
}
Exemplo n.º 16
0
GSList *
metadata_list_copy (GSList *list)
{
	GSList		*copy = NULL;
	GSList		*iter2, *iter = list;
	struct pair	*p;
	
	while (iter) {
		p = (struct pair*)iter->data;
		iter2 = p->data;
		while (iter2) {
			copy = metadata_list_append (copy, p->strid, iter2->data);
			iter2 = iter2->next;
		}
		iter = iter->next;
	}
	
	return copy;
}
Exemplo n.º 17
0
static void
parse_item_tag (feedParserCtxtPtr ctxt, xmlNodePtr cur)
{
	gchar *tmp;
	
	/* We ignore the "ping" tag */

  	if (xmlStrcmp (cur->name, BAD_CAST"about"))
		return;
		
	/* RSS 1.0 */
	tmp = xml_get_attribute (cur, "about");
		
	/* RSS 2.0 */
	if (!tmp)
		tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, 1);

	if (tmp) {
		ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "related", tmp);
		g_free (tmp);
	}
}
Exemplo n.º 18
0
static void
inoreader_source_merge_feed (InoreaderSourcePtr source, const gchar *url, const gchar *title, const gchar *id)
{
	nodePtr	node;
	GSList	*iter;

	/* check if node to be merged already exists */
	iter = source->root->children;
	while (iter) {
		node = (nodePtr)iter->data;
		if (g_str_equal (node->subscription->source, url))
			return;
		iter = g_slist_next (iter);
	}

	debug2 (DEBUG_UPDATE, "adding %s (%s)", title, url);
	node = node_new (feed_get_node_type ());
	node_set_title (node, title);
	node_set_data (node, feed_new ());
		
	node_set_subscription (node, subscription_new (url, NULL, NULL));
	node->subscription->type = &inoreaderSourceFeedSubscriptionType;

	/* Save Inoreader feed id which we need to fetch items... */
	node->subscription->metadata = metadata_list_append (node->subscription->metadata, "inoreader-feed-id", id);
	db_subscription_update (node->subscription);

	node_set_parent (node, source->root, -1);
	feedlist_node_imported (node);
		
	/**
	 * @todo mark the ones as read immediately after this is done
	 * the feed as retrieved by this has the read and unread
	 * status inherently.
	 */
	subscription_update (node->subscription, FEED_REQ_RESET_TITLE | FEED_REQ_PRIORITY_HIGH);
	subscription_update_favicon (node->subscription);
}
Exemplo n.º 19
0
/* method to parse standard tags for each item element */
itemPtr parseCDFItem(feedParserCtxtPtr ctxt, xmlNodePtr cur, CDFChannelPtr cp) {
	gchar		*tmp = NULL, *tmp2, *tmp3;

	if(CDFToMetadataMapping == NULL) {
		CDFToMetadataMapping = g_hash_table_new(g_str_hash, g_str_equal);
		g_hash_table_insert(CDFToMetadataMapping, "author", "author");
		g_hash_table_insert(CDFToMetadataMapping, "category", "category");
	}
		
	ctxt->item = item_new();
	
	/* save the item link */
	if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href")))
		tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF");
	if(tmp) {
		item_set_source(ctxt->item, tmp);
		g_free(tmp);
	}
	
	cur = cur->xmlChildrenNode;
	while(cur) {

		if(!cur->name || cur->type != XML_ELEMENT_NODE) {
			cur = cur->next;
			continue;
		}
		
		/* save first link to a channel image */
		if(NULL != (tmp = g_ascii_strdown((gchar *)cur->name, -1))) {
			if(NULL != (tmp2 = g_hash_table_lookup(CDFToMetadataMapping, tmp))) {
				if(NULL != (tmp3 = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE))) {
					ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, tmp2, tmp3);
					g_free(tmp3);
				}
			}
			g_free(tmp);
		}
		
		if((!xmlStrcasecmp(cur->name, BAD_CAST"logo"))) {
			
			if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href")))
				tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF");
			if(tmp) {
				ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "imageUrl", tmp);
				g_free(tmp);
			}
			
		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"title"))) {
			if(NULL != (tmp = unhtmlize((gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1)))) {
				item_set_title(ctxt->item, tmp);
				g_free(tmp);
			}
			
		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"abstract"))) {
			if(NULL != (tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1))) {
				item_set_description(ctxt->item, tmp);
				g_free(tmp);
			}
			
		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"a"))) {
			if(!(tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href")))
				tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF");
			if(tmp) {
				item_set_source(ctxt->item, tmp);
				g_free(tmp);
			}
		}
		
		cur = cur->next;
	}

	ctxt->item->readStatus = FALSE;
	
	return ctxt->item;
}
Exemplo n.º 20
0
/* Note: this function is called for both item and feed context */
static gchar *
atom10_parse_link (xmlNodePtr cur, feedParserCtxtPtr ctxt, struct atom10ParserState *state)
{
	gchar *href, *alternate = NULL;
	
	href = xml_get_ns_attribute (cur, "href", NULL);
	if (href) {
		xmlChar *baseURL = xmlNodeGetBase (cur->doc, cur);
		gchar *url, *relation, *type, *escTitle = NULL, *title;
		const gchar *feedURL = subscription_get_homepage (ctxt->subscription);
		
		if (!baseURL && feedURL && feedURL[0] != '|' && strstr (feedURL, "://"))
			baseURL = xmlStrdup (BAD_CAST (feedURL));
		url = (gchar *)common_build_url (href, (gchar *)baseURL);

		type = xml_get_ns_attribute (cur, "type", NULL);
		relation = xml_get_ns_attribute (cur, "rel", NULL);
		title = xml_get_ns_attribute (cur, "title", NULL);
		if (title)
			escTitle = g_markup_escape_text (title, -1);
		
		if (!xmlHasNsProp (cur, BAD_CAST"rel", NULL) || !relation || g_str_equal (relation, BAD_CAST"alternate"))
			alternate = g_strdup (url);
		else if (g_str_equal (relation, "replies")) {
			if (!type || g_str_equal (type, BAD_CAST"application/atom+xml")) {
				gchar *commentUri = (gchar *)common_build_url ((gchar *)url, subscription_get_homepage (ctxt->subscription));
				if (ctxt->item)
					metadata_list_set (&ctxt->item->metadata, "commentFeedUri", commentUri);
				g_free (commentUri);
			}
		} else if (g_str_equal (relation, "enclosure")) {
			if (ctxt->item) {
				gsize length = 0;
				gchar *lengthStr = xml_get_ns_attribute (cur, "length", NULL);
				if (lengthStr)
					length = atol (lengthStr);
				g_free (lengthStr);
				
				gchar *encStr = enclosure_values_to_string (url, type, length, FALSE /* not yet downloaded */);
				ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, "enclosure", encStr);
				ctxt->item->hasEnclosure = TRUE;
				g_free (encStr);
			}
		} else if (g_str_equal (relation, "related") || g_str_equal (relation, "via")) {	
			if (ctxt->item)
				ctxt->item->metadata = metadata_list_append (ctxt->item->metadata, relation, url);
		} else {
			/* g_warning ("Unhandled Atom link with unexpected relation \"%s\"\n", relation); */
		}
		xmlFree (title);
		xmlFree (baseURL);
		g_free (escTitle);
		g_free (url);
		g_free(relation);
		g_free(type);
		g_free(href);
	} else {
		/* FIXME: @href is required, this document is not valid Atom */;
	}
	
	return alternate;
}
Exemplo n.º 21
0
/* method to parse standard tags for the channel element */
static void parseCDFChannel(feedParserCtxtPtr ctxt, xmlNodePtr cur, CDFChannelPtr cp) {
	gchar		*tmp, *tmp2, *tmp3;
	
	cur = cur->xmlChildrenNode;
	while(cur) {
		if(!cur->name || cur->type != XML_ELEMENT_NODE) {
			cur = cur->next;
			continue;
		}

		if((!xmlStrcasecmp(cur->name, BAD_CAST"logo"))) {
			tmp = (gchar *)xmlGetProp(cur, BAD_CAST"HREF");
			if(tmp) {
				tmp = (gchar *)xmlGetProp(cur, BAD_CAST"href");
				metadata_list_set (&ctxt->subscription->metadata, "imageUrl", tmp);
				g_free(tmp);
			}

		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"a"))) {
			xmlChar *value = xmlGetProp(cur, BAD_CAST"HREF");
			if(value) {
				subscription_set_homepage (ctxt->subscription, (gchar *)value);
				xmlFree(value);
			}

		} else if((!xmlStrcasecmp(cur->name, BAD_CAST"item"))) {
			ctxt->item = parseCDFItem(ctxt, cur, cp);
			if(ctxt->item) {
				if(0 == ctxt->item->time)
					ctxt->item->time = cp->time;
				ctxt->items = g_list_append(ctxt->items, ctxt->item);
			}

		} else if(!xmlStrcasecmp(cur->name, BAD_CAST "title")) {
			tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE);
			if(tmp) {
				tmp = unhtmlize(tmp);
				
				if(ctxt->title)
					g_free(ctxt->title);
				ctxt->title = tmp;
			}
			
		} else if (!xmlStrcasecmp(cur->name, BAD_CAST "abstract")) {
			tmp = (gchar *)xmlNodeListGetString (cur->doc, cur->xmlChildrenNode, TRUE);
			if (tmp) {
				metadata_list_set (&ctxt->subscription->metadata, "description", tmp);
				xmlFree (tmp);
			}
			
		} else {		
			tmp = g_ascii_strdown((gchar *)cur->name, -1);
			tmp2 = g_hash_table_lookup(channelHash, tmp);
			if(tmp2) {
				tmp3 = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, TRUE);
				if(tmp3) {
					ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, tmp2, tmp3);
					g_free(tmp3);
				}
			}
			g_free(tmp);
		}
		
		cur = cur->next;
	}
}
Exemplo n.º 22
0
GList *
json_api_get_items (const gchar *json, const gchar *root, jsonApiMapping *mapping, jsonApiItemCallbackFunc callback)
{
	GList		*items = NULL;
	JsonParser	*parser = json_parser_new ();

	if (json_parser_load_from_data (parser, json, -1, NULL)) {
		JsonArray	*array = json_node_get_array (json_get_node (json_parser_get_root (parser), root));
		GList		*elements = json_array_get_elements (array);
		GList		*iter = elements;

		debug1 (DEBUG_PARSING, "JSON API: found items root node \"%s\"", root);
                
		while (iter) {
			JsonNode *node = (JsonNode *)iter->data;
			itemPtr item = item_new ();

			/* Parse default feeds */
			item_set_id	(item, json_api_get_string (node, mapping->id));
			item_set_title	(item, json_api_get_string (node, mapping->title));
			item_set_source	(item, json_api_get_string (node, mapping->link));

			item->time       = json_api_get_int (node, mapping->updated);
			item->readStatus = json_api_get_bool (node, mapping->read);
			item->flagStatus = json_api_get_bool (node, mapping->flag);

			if (mapping->negateRead)
				item->readStatus = !item->readStatus;

			/* Handling encoded content */
			const gchar *content; 
			gchar *xhtml;

			content = json_api_get_string (node, mapping->description);
			if (mapping->xhtml) {
				xhtml = xhtml_extract_from_string (content, NULL);
				item_set_description (item, xhtml);
				xmlFree (xhtml);
			} else {
				item_set_description (item, content);
			}

			/* Optional meta data */
			const gchar *tmp = json_api_get_string (node, mapping->author);
			if (tmp)
				item->metadata = metadata_list_append (item->metadata, "author", tmp);
	
			items = g_list_append (items, (gpointer)item);

			/* Allow optional item callback to process stuff */
			if (callback)
				(*callback)(node, item);
				
			iter = g_list_next (iter);
		}

		g_list_free (elements);
		g_object_unref (parser);
	} else {
		debug1 (DEBUG_PARSING, "Could not parse JSON \"%s\"", json);
	}

	return items;
}
Exemplo n.º 23
0
/* method to parse standard tags for each item element */
itemPtr parseEntry(feedParserCtxtPtr ctxt, xmlNodePtr cur) {
	xmlChar			*xtmp;
	gchar			*tmp2, *tmp;
	NsHandler		*nsh;
	parseItemTagFunc	pf;
	
	g_assert(NULL != cur);
		
	ctxt->item = item_new();
	
	cur = cur->xmlChildrenNode;
	while(cur) {
		if(!cur->name) {
			g_warning("invalid XML: parser returns NULL value -> tag ignored!");
			cur = cur->next;
			continue;
		}
		
		
		/* check namespace of this tag */
		if(cur->ns) {
			if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) ||
			   (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) {
				
				if(NULL != (pf = nsh->parseItemTag))
					(*pf)(ctxt, cur);
				cur = cur->next;
				continue;
			} else {
				/*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/
			}
		} /* explicitly no following else !!! */
		
		if(!xmlStrcmp(cur->name, BAD_CAST"title")) {
			if(NULL != (tmp = unhtmlize(pie_parse_content_construct(cur)))) {
				item_set_title(ctxt->item, tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"link")) {
			if(NULL != (tmp2 = xml_get_attribute(cur, "href"))) {
				/* 0.3 link : rel, type and href attribute */
				xtmp = xmlGetProp(cur, BAD_CAST"rel");
				if(xtmp != NULL && !xmlStrcmp(xtmp, BAD_CAST"alternate"))
					item_set_source(ctxt->item, tmp2);
				/* else
					FIXME: Maybe do something with other links? */
				xmlFree(xtmp);
				g_free(tmp2);
			} else {
				/* 0.2 link : element content is the link, or non-alternate link in 0.3 */
				if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
					item_set_source(ctxt->item, tmp);
					g_free(tmp);
				}
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"author")) {
			/* parse feed author */
			tmp =  parseAuthor(cur);
			ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "author", tmp);
			g_free(tmp);
		} else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) {
			/* parse feed contributors */
			tmp = parseAuthor(cur);
			ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "contributor", tmp);
			g_free(tmp);
		} else if(!xmlStrcmp(cur->name, BAD_CAST"id")) {
			if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
				item_set_id(ctxt->item, tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"issued")) {
			/* FIXME: is <modified> or <issued> or <created> the time tag we want to display? */
 			if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
				ctxt->item->time = date_parse_ISO8601 (tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"content")) {
			/* <content> support */
			if(NULL != (tmp = pie_parse_content_construct(cur))) {
				item_set_description(ctxt->item, tmp);
				g_free(tmp);
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"summary")) {			
			/* <summary> can be used for short text descriptions, if there is no
			   <content> description we show the <summary> content */
			if(!item_get_description(ctxt->item)) {
				if(NULL != (tmp = pie_parse_content_construct(cur))) {
					item_set_description(ctxt->item, tmp);
					g_free(tmp);
				}
			}
		} else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) {
 			if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) {
				ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "copyright", tmp);
				g_free(tmp);
			}
		}
		cur = cur->next;
	}
	
	/* after parsing we fill the infos into the itemPtr structure */
	ctxt->item->readStatus = FALSE;

	return ctxt->item;
}
Exemplo n.º 24
0
/* reads a PIE feed URL and returns a new channel structure (even if
   the feed could not be read) */
static void pie_parse(feedParserCtxtPtr ctxt, xmlNodePtr cur) {
	gchar			*tmp2, *tmp = NULL, *tmp3;
	NsHandler		*nsh;
	parseChannelTagFunc	pf;
	
	while(TRUE) {
		if(xmlStrcmp(cur->name, BAD_CAST"feed")) {
			g_string_append(ctxt->feed->parseErrors, "<p>Could not find Atom/Echo/PIE header!</p>");
			break;			
		}

		/* parse feed contents */
		cur = cur->xmlChildrenNode;
		while(cur) {
			if(!cur->name || cur->type != XML_ELEMENT_NODE) {
				cur = cur->next;
				continue;
			}
			
			/* check namespace of this tag */
			if(cur->ns) {
				if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) ||
				   (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) {
					pf = nsh->parseChannelTag;
					if(pf)
						(*pf)(ctxt, cur);
					cur = cur->next;
					continue;
				} else {
					/*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/
				}
			} /* explicitly no following else !!! */
			
			if(!xmlStrcmp(cur->name, BAD_CAST"title")) {
				tmp = unhtmlize(pie_parse_content_construct(cur));
				if(tmp) {
					if(ctxt->title)
						g_free(ctxt->title);
					ctxt->title = tmp;
				}
			} else if(!xmlStrcmp(cur->name, BAD_CAST"link")) {
				tmp = xml_get_attribute (cur, "href");
				if(tmp) {				
					/* 0.3 link : rel, type and href attribute */
					tmp2 = xml_get_attribute (cur, "rel");
					if(tmp2 && g_str_equal(tmp2, "alternate"))
						subscription_set_homepage (ctxt->subscription, tmp);
					/* else
						FIXME: Maybe do something with other links? */
					g_free(tmp2);
					g_free(tmp);
				} else {
					/* 0.2 link : element content is the link, or non-alternate link in 0.3 */
					tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1);
					if(tmp) {
						subscription_set_homepage (ctxt->subscription, tmp);
						g_free(tmp);
					}
				}
				
			/* parse feed author */
			} else if(!xmlStrcmp(cur->name, BAD_CAST"author")) {
				/* parse feed author */
				tmp = parseAuthor(cur);
				if(tmp) {
					ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "author", tmp);
					g_free(tmp);
				}
			} else if (!xmlStrcmp (cur->name, BAD_CAST"tagline")) {
				tmp = pie_parse_content_construct (cur);
				if (tmp) {
					metadata_list_set (&ctxt->subscription->metadata, "description", tmp);
					g_free (tmp);				
				}
			} else if(!xmlStrcmp(cur->name, BAD_CAST"generator")) {
				tmp = unhtmlize((gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1));
				if(tmp && tmp[0] != '\0') {
					tmp2 = xml_get_attribute (cur, "version");
					if(tmp2) {
						tmp3 = g_strdup_printf("%s %s", tmp, tmp2);
						g_free(tmp);
						g_free(tmp2);
						tmp = tmp3;
					}
					tmp2 = xml_get_attribute (cur, "url");
					if(tmp2) {
						tmp3 = g_strdup_printf("<a href=\"%s\">%s</a>", tmp2, tmp);
						g_free(tmp2);
						g_free(tmp);
						tmp = tmp3;
					}
					ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "feedgenerator", tmp);
				}
				g_free(tmp);
			} else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) {
				tmp = pie_parse_content_construct(cur);
				if(tmp) {
					ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "copyright", tmp);
					g_free(tmp);
				}				
				
			} else if(!xmlStrcmp(cur->name, BAD_CAST"modified")) { /* Modified was last used in IETF draft 02) */
				tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1);
				if(tmp) {
					ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp);
					ctxt->feed->time = date_parse_ISO8601 (tmp);
					g_free(tmp);
				}

			} else if(!xmlStrcmp(cur->name, BAD_CAST"updated")) { /* Updated was added in IETF draft 03 */
				tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1);
				if(tmp) {
					ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp);
					ctxt->feed->time = date_parse_ISO8601 (tmp);
					g_free(tmp);
				}

			} else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { 
				tmp = parseAuthor(cur);
				if(tmp) {
					ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "contributor", tmp);
					g_free(tmp);
				}
				
			} else if((!xmlStrcmp(cur->name, BAD_CAST"entry"))) {
				ctxt->item = parseEntry(ctxt, cur);
				if(ctxt->item) {
					if(0 == ctxt->item->time)
						ctxt->item->time = ctxt->feed->time;
					ctxt->items = g_list_append(ctxt->items, ctxt->item);
				}
			}
			
			cur = cur->next;
		}
		
		break;
	}
}