Beispiel #1
0
my_bool
my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
{
  MY_XML_PARSER p;
  struct my_cs_file_info info;
  my_bool rc;
  
  my_charset_file_init(&info);
  my_xml_parser_create(&p);
  my_xml_set_enter_handler(&p,cs_enter);
  my_xml_set_value_handler(&p,cs_value);
  my_xml_set_leave_handler(&p,cs_leave);
  info.loader= loader;
  my_xml_set_user_data(&p, (void *) &info);
  rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
  my_xml_parser_free(&p);
  my_charset_file_free(&info);
  if (rc != MY_XML_OK)
  {
    const char *errstr= my_xml_error_string(&p);
    if (sizeof(loader->error) > 32 + strlen(errstr))
    {
      /* We cannot use my_snprintf() here. See previous comment. */
      sprintf(loader->error, "at line %d pos %d: %s",
                my_xml_error_lineno(&p)+1,
                (int) my_xml_error_pos(&p),
                my_xml_error_string(&p));
    }
  }
  return rc;
}
Beispiel #2
0
my_bool my_parse_charset_xml(const char *buf, uint len, 
				    int (*add_collation)(CHARSET_INFO *cs))
{
  MY_XML_PARSER p;
  struct my_cs_file_info i;
  my_bool rc;
  
  my_xml_parser_create(&p);
  my_xml_set_enter_handler(&p,cs_enter);
  my_xml_set_value_handler(&p,cs_value);
  my_xml_set_leave_handler(&p,cs_leave);
  i.add_collation= add_collation;
  my_xml_set_user_data(&p,(void*)&i);
  rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
  my_xml_parser_free(&p);
  return rc;
}
Beispiel #3
0
int main(int ac, char **av)
{
  char str[1024*64]="";
  const char *fn;
  int  f;
  uint len;
  MY_XML_PARSER p;
  
  if (ac<2)
  {
    usage(av[0]);
    return 0;
  }
  
  fn=av[1]?av[1]:"test.xml";
  if ((f=open(fn,O_RDONLY))<0)
  {
    fprintf(stderr,"Err '%s'\n",fn);
    return 1;
  }
  
  len=read(f,str,sizeof(str)-1);
  str[len]='\0';
  
  my_xml_parser_create(&p);
  
  my_xml_set_enter_handler(&p,bstr);
  my_xml_set_value_handler(&p,dstr);
  my_xml_set_leave_handler(&p,estr);
  
  if (MY_XML_OK!=(f=my_xml_parse(&p,str,len)))
  {
    printf("ERROR at line %d pos %d '%s'\n",
      my_xml_error_lineno(&p)+1,
      my_xml_error_pos(&p),
      my_xml_error_string(&p));
  }
  
  my_xml_parser_free(&p);
  
  return 0;
}
Beispiel #4
0
bool aggregator_parse_feed( string data, map <string, string> &feed )
{
	channel.clear();
	image.clear();
	items.clear();
	item = 0;

	XML_Parser xml_parser = my_xml_parser_create( data );

	XML_SetUserData(xml_parser, &xml_parser);
	XML_SetElementHandler(xml_parser, aggregator_element_start, aggregator_element_end);
	XML_SetCharacterDataHandler(xml_parser, aggregator_element_data);
	
	if( !XML_Parse(xml_parser, data.c_str(), (int) data.length(), 1) )
	{
		string error = "The feed from "+feed["title"]+" seems to be broken, because of error " +
			XML_ErrorString( XML_GetErrorCode( xml_parser ) ) + " on line " +
			str( XML_GetCurrentLineNumber( xml_parser ) ) + ".";
		set_page_message( error, "error");
		return false;
	}

	XML_ParserFree(xml_parser);

	for( int i = item; i>=0; i-- )
	{
		string	title, link, guid, date;

		for( map <string, string>::iterator curr = items[i].begin(), end = items[i].end();  curr != end;  curr++ )
		{
			trim( curr->second );
			items[i][curr->first] = curr->second;
		}
		
		// Resolve the item's title. If no title is found, we use up to 40
		// characters of the description ending at a word boundary but not
		// splitting potential entities.
		if (isset(items[i]["TITLE"]))
		{
			title = items[i]["TITLE"];
		}
		else if (isset(items[i]["DESCRIPTION"]))
		{
			title = items[i]["DESCRIPTION"].substr(0, 40);
		}
		else
		{
			title = "";
		}

		// Resolve the items link.
		if (isset(items[i]["LINK"])) 
		{
			link = items[i]["LINK"];
		}
		else 
		{
			link = feed["link"];
		}
		guid = isset(items[i]["GUID"]) ? items[i]["GUID"] : "";

		// Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag.
		if (isset(items[i]["CONTENT:ENCODED"])) {
			items[i]["DESCRIPTION"] = items[i]["CONTENT:ENCODED"];
		}
		else if (isset(items[i]["SUMMARY"])) {
			items[i]["DESCRIPTION"] = items[i]["SUMMARY"];
		}
		else if (isset(items[i]["CONTENT"])) {
			items[i]["DESCRIPTION"] = items[i]["CONTENT"];
		}

		if (isset(items[i]["DC:CREATOR"])) {
			items[i]["AUTHOR"] = items[i]["DC:CREATOR"];
		}

		// Try to resolve and parse the item's publication date. If no date is
		// found, we use the current date instead.
		date = "now";
		if (isset(items[i]["PUBDATE"])) { date = items[i]["PUBDATE"]; }
		if (isset(items[i]["DC:DATE"])) { date = items[i]["DC:DATE"]; }
		if (isset(items[i]["DCTERMS:ISSUED"])) { date = items[i]["DCTERMS:ISSUED"]; }
		if (isset(items[i]["DCTERMS:CREATED"])) { date = items[i]["DCTERMS:CREATED"]; }
		if (isset(items[i]["DCTERMS:MODIFIED"])) { date = items[i]["DCTERMS:MODIFIED"]; }
		if (isset(items[i]["ISSUED"])) { date = items[i]["ISSUED"]; }
		if (isset(items[i]["CREATED"])) { date = items[i]["CREATED"]; }
		if (isset(items[i]["MODIFIED"])) { date = items[i]["MODIFIED"]; }
		if (isset(items[i]["PUBLISHED"])) { date = items[i]["PUBLISHED"]; }
		if (isset(items[i]["UPDATED"])) { date = items[i]["UPDATED"]; }

		string timestamp = strtotime( date );
		if( timestamp == "") timestamp = str( time() );

		map <string, string> entry;

		if(DB_TYPE==1)
		{
			if( isset( guid ) )
			{
				map <string, string> item;
				REDIS_RES *result = redis_query_fields( redis_arg("SORT aggregator_item:fid:%d BY nosort", intval(feed["fid"])), "GET aggregator_item:*->", "#iid,guid");
				while( redis_fetch_fields( result, item ) ) {
					if( item["guid"] == guid) {
						redis_free(result); 
						entry["iid"] = item["iid"];
						break;
					}
				}
			}
			else if (isset(link) && link != feed["link"] && link != feed["url"])
			{
				map <string, string> item;
				REDIS_RES *result = redis_query_fields( redis_arg("SORT aggregator_item:fid:%d BY nosort", intval(feed["fid"])), "GET aggregator_item:*->", "#iid,link");
				while( redis_fetch_fields( result, item ) ) {
					if( entry["link"] == link) {
						redis_free(result); 
						entry["iid"] = item["iid"];
						break;
					}
				}
			}
			else
			{
				map <string, string> item;
				REDIS_RES *result = redis_query_fields( redis_arg("SORT aggregator_item:fid:%d BY nosort", intval(feed["fid"])), "GET aggregator_item:*->", "#iid,title");
				while( redis_fetch_fields( result, item ) ) {
					if( entry["title"] == title) {
						redis_free(result); 
						entry["iid"] = item["iid"];
						break;
					}
				}
			}
		}
		if(DB_TYPE==2)
		{
			if( isset( guid ) )
			{
				db_fetch(db_querya("SELECT iid FROM aggregator_item WHERE fid = %d AND guid = '%s'", feed["fid"].c_str(), guid.c_str()), entry );
			}
			else if (isset(link) && link != feed["link"] && link != feed["url"])
			{
				db_fetch(db_querya("SELECT iid FROM aggregator_item WHERE fid = %d AND link = '%s'", feed["fid"].c_str(), link.c_str()), entry );
			}
			else {
				db_fetch(db_querya("SELECT iid FROM aggregator_item WHERE fid = %d AND title = '%s'", feed["fid"].c_str(), title.c_str()), entry );
			}
		}

		entry["fid"] = feed["fid"];
		entry["livetime"] = feed["livetime"];
		entry["timestamp"] = timestamp;
		entry["title"] = title;
		entry["link"] = link;
		entry["author"] = items[i]["AUTHOR"];
		entry["description"] = items[i]["DESCRIPTION"];
		entry["guid"] = guid;

		aggregator_save_item( entry );
	}

	// Remove all items that are older than flush item timer.
	if(DB_TYPE==1)
	{
	}
	if(DB_TYPE==2)
	{
		db_querya("DELETE FROM aggregator_item WHERE fid = %d AND livetime>0 AND timestamp < unix_timestamp()-livetime", feed["fid"].c_str() );
	}
	
	return true;
}