/* method to parse standard tags for each item element */ itemPtr parseEntry(feedParserCtxtPtr ctxt, xmlNodePtr cur) { xmlChar *xtmp; gchar *tmp2, *tmp; NsHandler *nsh; parseItemTagFunc pf; g_assert(NULL != cur); ctxt->item = item_new(); cur = cur->xmlChildrenNode; while(cur) { if(!cur->name) { g_warning("invalid XML: parser returns NULL value -> tag ignored!"); cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { if(NULL != (pf = nsh->parseItemTag)) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { if(NULL != (tmp = unhtmlize(pie_parse_content_construct(cur)))) { item_set_title(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { if(NULL != (tmp2 = xml_get_attribute(cur, "href"))) { /* 0.3 link : rel, type and href attribute */ xtmp = xmlGetProp(cur, BAD_CAST"rel"); if(xtmp != NULL && !xmlStrcmp(xtmp, BAD_CAST"alternate")) item_set_source(ctxt->item, tmp2); /* else FIXME: Maybe do something with other links? */ xmlFree(xtmp); g_free(tmp2); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_source(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "author", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { /* parse feed contributors */ tmp = parseAuthor(cur); ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "contributor", tmp); g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"id")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { item_set_id(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"issued")) { /* FIXME: is <modified> or <issued> or <created> the time tag we want to display? */ if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"content")) { /* <content> support */ if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"summary")) { /* <summary> can be used for short text descriptions, if there is no <content> description we show the <summary> content */ if(!item_get_description(ctxt->item)) { if(NULL != (tmp = pie_parse_content_construct(cur))) { item_set_description(ctxt->item, tmp); g_free(tmp); } } } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { if(NULL != (tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1))) { ctxt->item->metadata = metadata_list_append(ctxt->item->metadata, "copyright", tmp); g_free(tmp); } } cur = cur->next; } /* after parsing we fill the infos into the itemPtr structure */ ctxt->item->readStatus = FALSE; return ctxt->item; }
/* reads a PIE feed URL and returns a new channel structure (even if the feed could not be read) */ static void pie_parse(feedParserCtxtPtr ctxt, xmlNodePtr cur) { gchar *tmp2, *tmp = NULL, *tmp3; NsHandler *nsh; parseChannelTagFunc pf; while(TRUE) { if(xmlStrcmp(cur->name, BAD_CAST"feed")) { g_string_append(ctxt->feed->parseErrors, "<p>Could not find Atom/Echo/PIE header!</p>"); break; } /* parse feed contents */ cur = cur->xmlChildrenNode; while(cur) { if(!cur->name || cur->type != XML_ELEMENT_NODE) { cur = cur->next; continue; } /* check namespace of this tag */ if(cur->ns) { if((cur->ns->href && (nsh = (NsHandler *)g_hash_table_lookup(ns_pie_ns_uri_table, (gpointer)cur->ns->href))) || (cur->ns->prefix && (nsh = (NsHandler *)g_hash_table_lookup(pie_nstable, (gpointer)cur->ns->prefix)))) { pf = nsh->parseChannelTag; if(pf) (*pf)(ctxt, cur); cur = cur->next; continue; } else { /*g_print("unsupported namespace \"%s\"\n", cur->ns->prefix);*/ } } /* explicitly no following else !!! */ if(!xmlStrcmp(cur->name, BAD_CAST"title")) { tmp = unhtmlize(pie_parse_content_construct(cur)); if(tmp) { if(ctxt->title) g_free(ctxt->title); ctxt->title = tmp; } } else if(!xmlStrcmp(cur->name, BAD_CAST"link")) { tmp = xml_get_attribute (cur, "href"); if(tmp) { /* 0.3 link : rel, type and href attribute */ tmp2 = xml_get_attribute (cur, "rel"); if(tmp2 && g_str_equal(tmp2, "alternate")) subscription_set_homepage (ctxt->subscription, tmp); /* else FIXME: Maybe do something with other links? */ g_free(tmp2); g_free(tmp); } else { /* 0.2 link : element content is the link, or non-alternate link in 0.3 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { subscription_set_homepage (ctxt->subscription, tmp); g_free(tmp); } } /* parse feed author */ } else if(!xmlStrcmp(cur->name, BAD_CAST"author")) { /* parse feed author */ tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "author", tmp); g_free(tmp); } } else if (!xmlStrcmp (cur->name, BAD_CAST"tagline")) { tmp = pie_parse_content_construct (cur); if (tmp) { metadata_list_set (&ctxt->subscription->metadata, "description", tmp); g_free (tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"generator")) { tmp = unhtmlize((gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1)); if(tmp && tmp[0] != '\0') { tmp2 = xml_get_attribute (cur, "version"); if(tmp2) { tmp3 = g_strdup_printf("%s %s", tmp, tmp2); g_free(tmp); g_free(tmp2); tmp = tmp3; } tmp2 = xml_get_attribute (cur, "url"); if(tmp2) { tmp3 = g_strdup_printf("<a href=\"%s\">%s</a>", tmp2, tmp); g_free(tmp2); g_free(tmp); tmp = tmp3; } ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "feedgenerator", tmp); } g_free(tmp); } else if(!xmlStrcmp(cur->name, BAD_CAST"copyright")) { tmp = pie_parse_content_construct(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "copyright", tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"modified")) { /* Modified was last used in IETF draft 02) */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"updated")) { /* Updated was added in IETF draft 03 */ tmp = (gchar *)xmlNodeListGetString(ctxt->doc, cur->xmlChildrenNode, 1); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "pubDate", tmp); ctxt->feed->time = date_parse_ISO8601 (tmp); g_free(tmp); } } else if(!xmlStrcmp(cur->name, BAD_CAST"contributor")) { tmp = parseAuthor(cur); if(tmp) { ctxt->subscription->metadata = metadata_list_append(ctxt->subscription->metadata, "contributor", tmp); g_free(tmp); } } else if((!xmlStrcmp(cur->name, BAD_CAST"entry"))) { ctxt->item = parseEntry(ctxt, cur); if(ctxt->item) { if(0 == ctxt->item->time) ctxt->item->time = ctxt->feed->time; ctxt->items = g_list_append(ctxt->items, ctxt->item); } } cur = cur->next; } break; } }
static void parseTitleInfo (xmlDocPtr doc, xmlNodePtr cur, FB2Content *fb) { xmlChar *content; cur = cur->children; while (cur != NULL) { if (!xmlStrcmp(cur->name, (const xmlChar *)"p")) { parseP(doc, cur, 1, fb); } else if (!xmlStrcmp(cur->name, (const xmlChar *)"empty-line")) { bufferAppend("\n", 1, fb); } else if (!xmlStrcmp(cur->name, (const xmlChar *)"genre")) { content = xmlNodeGetContent(cur->children); if(content) { /* add genre */ int i; for (i=0; i<BUF_SIZE-1; i++) { if (fb->genres[i] == NULL) { fb->genres[i] = content; fb->genres[i+1] = NULL; fb->num_genres++; break; } } } } else if (!xmlStrcmp(cur->name, (const xmlChar *)"book-title")) { content = xmlNodeGetContent(cur->children); if (content) strncpy(fb->name, content, BUF_SIZE); xmlFree(content); } else if (!xmlStrcmp(cur->name, (const xmlChar *)"author")) { parseAuthor(doc, cur, fb); } else if (!xmlStrcmp(cur->name, (const xmlChar *)"annotation")) { parseAnnotation(doc, cur, fb); } else if (!xmlStrcmp(cur->name, (const xmlChar *)"coverpage")) { xmlNodePtr c; c = cur->children; while (c) { if (!xmlStrcmp(c->name, (const xmlChar *)"image")) { xmlChar *cover_href; cover_href = xmlGetProp(c, (const xmlChar *)"href"); strncpy(fb->cover_href, cover_href, BUF_SIZE); xmlFree (cover_href); break; } c = c->next; } /* content = xmlNodeGetContent(cur->children); if (content) strncpy(fb->cover_href, content, BUF_SIZE); */ //if (cover_href) /*xmlFree(content);*/ } cur = cur->next; } return; }