/* reads a PIE feed URL and returns a new channel structure (even if the feed could not be read) */ static void pie_parse(feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp2, *tmp = NULL, *tmp3; NsHandler *nsh; parseChannelTagFunc pf; while(TRUE) { if(xmlStrcmp(cur->name, BAD_CAST"feed")) { g_string_append(ctxt->feed->parseErrors, "<p>Could not find Atom/Echo/PIE header!</p>"); break; } /* parse feed contents */ cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { pf = nsh->parseChannelTag; if(pf) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { tmp = unhtmlize(pie_parse_content_construct(cur)); if(tmp) { if(ctxt->title) g_free(ctxt->title); ctxt->title = tmp; } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { tmp = xml_get_attribute (cur, "href"); if(tmp) { /* 0.3 link : rel, type and href attribute */ tmp2 = xml_get_attribute (cur, "rel"); if(tmp2 && g_str_equal(tmp2, "alternate")) subscription_set_homepage (ctxt->subscription, tmp); /* else FIXME: Maybe do something with other links? */ g_free(tmp2); g_free(tmp); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { subscription_set_homepage (ctxt->subscription, tmp); g_free(tmp); } } /* parse feed author */ } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "author", tmp); g_free(tmp); } } else if (!xmlStrcmp (cur->name, BAD_CAST"tagline")) { tmp = pie_parse_content_construct (cur); if (tmp) { metadata_list_set (&ctxt->subscription->metadata, "description", tmp); g_free (tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"generator")) { tmp = unhtmlize((gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1)); if(tmp && tmp[0] != '\0') { tmp2 = xml_get_attribute (cur, "version"); if(tmp2) { tmp3 = g_strdup_printf("%s %s", tmp, tmp2); g_free(tmp); g_free(tmp2); tmp = tmp3; } tmp2 = xml_get_attribute (cur, "url"); if(tmp2) { tmp3 = g_strdup_printf("<a href=\"%s\">%s</a>", tmp2, tmp); g_free(tmp2); g_free(tmp); tmp = tmp3; } ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "feedgenerator", tmp); } g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { tmp = pie_parse_content_construct(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "copyright", tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"modified")) { /* Modified was last used in IETF draft 02) */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"updated")) { /* Updated was added in IETF draft 03 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "contributor", tmp); g_free(tmp); } } else if((!xmlStrcmp(cur->name, BAD_CAST"entry"))) { ctxt->item = parseEntry(ctxt, cur); if(ctxt->item) { if(0 == ctxt->item->time) ctxt->item->time = ctxt->feed->time; ctxt->items = g_list_append(ctxt->items, ctxt->item); } } cur = cur->next; } break; } }
/* The follow are not used, but had been recognized: "language", <---- Not in atom 0.2 or 0.3. We should use xml:lang "lastBuildDate", <--- Where is this from? "issued", <-- Not in the specs for feeds "created", <---- Not in the specs for feeds */ gchar* pie_parse_content_construct(xmlNodePtr cur) { gchar *mode, *type, *ret; g_assert(NULL != cur); ret = NULL; /* determine encoding mode */ mode = xml_get_attribute (cur, "mode"); type = xml_get_attribute (cur, "type"); /* Modes are used in older versions of ATOM, including 0.3. It does not exist in the newer IETF drafts.*/ if(NULL != mode) { if(!strcmp(mode, "escaped")) { gchar *tmp; tmp = xhtml_extract (cur, 0, NULL); if(NULL != tmp) ret = tmp; } else if(!strcmp(mode, "xml")) { ret = xhtml_extract (cur, 1,NULL); } else if(!strcmp(mode, "base64")) { g_warning("Base64 encoded <content> in Atom feeds not supported!\n"); } else if(!strcmp(mode, "multipart/alternative")) { if(NULL != cur->xmlChildrenNode) ret = pie_parse_content_construct(cur->xmlChildrenNode); } g_free(mode); } else { /* some feeds don'ts specify a mode but a MIME type in the type attribute... */ /* not sure what MIME types are necessary... */ /* This that need to be de-encoded and should not contain sub-tags.*/ if(NULL == type || !g_ascii_strcasecmp(type, "TEXT") || !strcmp(type, "text/plain")) { gchar *tmp; tmp = (gchar *)xmlNodeListGetString(cur->doc, cur->xmlChildrenNode, 1); ret = g_markup_printf_escaped("<div xmlns=\"http://www.w3.org/1999/xhtml\"><pre>%s</pre></div>", tmp); g_free(tmp); /* Next are things that contain subttags */ } else if(!g_ascii_strcasecmp(type, "HTML") || !strcmp(type, "text/html")) { ret = xhtml_extract (cur, 0,"http://default.base.com/"); } else if(/* HTML types */ !g_ascii_strcasecmp(type, "xhtml") || !strcmp(type, "application/xhtml+xml")) { ret = xhtml_extract (cur, 1,"http://default.base.com/"); } } /* If the type was text, everything must be now escaped and wrapped in pre tags.... Also, the atom 0.3 spec says that the default type MUST be considered to be text/plain. The type tag is required in 0.2.... */ //if (ret != NULL && (type == NULL || !strcmp(type, "text/plain") || !strcmp(type,"TEXT")))) { g_free(type); return ret; }
/* method to parse standard tags for each item element */ itemPtr parseEntry(feedParserCtxtPtr ctxt, xmlNodePtr cur) { xmlChar *xtmp; gchar *tmp2, *tmp; NsHandler *nsh; parseItemTagFunc pf; g_assert(NULL != cur); ctxt->item = item_new(); cur = cur->xmlChildrenNode; while(cur) { if(!cur->name) { g_warning("invalid XML: parser returns NULL value -> tag ignored!"); cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { if(NULL != (pf = nsh->parseItemTag)) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { if(NULL != (tmp = unhtmlize(pie_parse_content_construct(cur)))) { item_set_title(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { if(NULL != (tmp2 = xml_get_attribute(cur, "href"))) { /* 0.3 link : rel, type and href attribute */ xtmp = xmlGetProp(cur, BAD_CAST"rel"); if(xtmp != NULL && !xmlStrcmp(xtmp, BAD_CAST"alternate")) item_set_source(ctxt->item, tmp2); /* else FIXME: Maybe do something with other links? */ xmlFree(xtmp); g_free(tmp2); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_source(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "author", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { /* parse feed contributors */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "contributor", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"id")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_id(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"issued")) { /* FIXME: is <modified> or <issued> or <created> the time tag we want to display? */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"content")) { /* <content> support */ if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"summary")) { /* <summary> can be used for short text descriptions, if there is no <content> description we show the <summary> content */ if(!item_get_description(ctxt->item)) { if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "copyright", tmp); g_free(tmp); } } cur = cur->next; } /* after parsing we fill the infos into the itemPtr structure */ ctxt->item->readStatus = FALSE; return ctxt->item; }