Exemplo n.º 1
0
Arquivo: htmlpp.c Projeto: gxf/heigong
int main(int argc, char **argv) 
{
    char *filename = argv[1];
    htmlDocPtr doc;
    int parse_flags = HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING
        | HTML_PARSE_NOBLANKS |HTML_PARSE_COMPACT | HTML_PARSE_RECOVER; 

    LIBXML_TEST_VERSION

    if (argc != 2) {
        fprintf(stderr, "Usage: %s in.html\n", argv[0]);
        return(1);
    }

    pre_trim(filename);

    doc = htmlReadFile(filename, NULL, parse_flags);

    if (doc == NULL) {
        fprintf(stderr, "Failed to parse %s\n", filename);
	return;
    }

    trim(doc);

    output(doc);

    xmlFreeDoc(doc);
    xmlCleanupParser();
}
/**
 * ExtractBoardList
 * コンストラクタ
 */
ExtractBoardList::ExtractBoardList(const char* file) {

     // HTML読み込み用構造体
     htmlDocPtr m_doc;
     // SQLiteAccessorのインスタンスを準備する
     SQLiteAccessor* accessor = new SQLiteAccessor();
     boardInfoArray = new wxArrayString();

     // ファイル名とエンコードの設定
     const char* enc = "utf-8";

     // HTMLの読み込み
     m_doc = htmlReadFile(file, enc, HTML_PARSE_RECOVER );

     if (NULL == m_doc) {
	  // NULLが返された場合その時点で終了する
	  xmlCleanupParser();
	  xmlCleanupCharEncodingHandlers();
	  delete accessor;
	  delete boardInfoArray;
	  return;
     }

     // htmlNodePtrに変換する
     htmlNodePtr root = xmlDocGetRootElement(m_doc);

     if (NULL == root) {
	  // NULLが返された場合その時点で終了する
	  xmlCleanupParser();
	  xmlCleanupCharEncodingHandlers();
	  delete accessor;
	  delete boardInfoArray;
	  return;
     } else {
	  // 正常処理
	  FindBoardInfo(root);
	  xmlCleanupParser();
	  xmlCleanupCharEncodingHandlers();
     }

     accessor->SetBoardInfoCommit(boardInfoArray);
     delete accessor;
     delete boardInfoArray;
}
Exemplo n.º 3
0
int
convert_station_forecacom_data(const char *station_id_with_path, const char *result_file, const char *detail_path_data ){
 
    xmlDoc  *doc = NULL;
    xmlNode *root_node = NULL;
    int    days_number = -1;
    char   buffer[1024],
           buffer2[1024],
            *delimiter = NULL;
    FILE    *file_out;

    if(!station_id_with_path)
        return -1;
/* check for new file, if it exist, than rename it */
    *buffer = 0;
    snprintf(buffer, sizeof(buffer) - 1, "%s.new", station_id_with_path);
    if(!access(buffer, R_OK))
        rename(buffer, station_id_with_path);
    /* check file accessability */
    if(!access(station_id_with_path, R_OK)){
        /* check that the file containe valid data */
        doc =  htmlReadFile(station_id_with_path, "UTF-8",  HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
        if(!doc)
            return -1;
        root_node = xmlDocGetRootElement(doc);
        if(root_node->type == XML_ELEMENT_NODE &&
                strstr((char*)root_node->name, "err")){
            xmlFreeDoc(doc);
            xmlCleanupParser();
            return -2;
        }
        else{
            /* prepare station id */
            *buffer = 0;
            *buffer2 = 0;
            snprintf(buffer2, sizeof(buffer2) - 1, "%s", station_id_with_path);
            delimiter = strrchr(buffer2, '/');
            if(delimiter){
                delimiter++; /* delete '/' */
                snprintf(buffer, sizeof(buffer) - 1, "%s", delimiter);
                delimiter = strrchr(buffer, '.');
                if(!delimiter){
                    xmlFreeDoc(doc);
                    xmlCleanupParser();
                    return -1;
                }
                *delimiter = 0;
             //   if(get_detail_data)
             //       days_number = parse_xml_detail_data(buffer, root_node, data);
             //   else
                days_number = parse_and_write_xml_data(buffer, doc, result_file);
                xmlFreeDoc(doc);
                xmlCleanupParser();
                if(!access(detail_path_data, R_OK)){
                     doc =  htmlReadFile(detail_path_data, "UTF-8",  HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
                    if(doc){
                        root_node = NULL;
                        root_node = xmlDocGetRootElement(doc);
                        if(!root_node || ( root_node->type == XML_ELEMENT_NODE &&
                                strstr((char*)root_node->name, "err"))){
                            xmlFreeDoc(doc);
                            xmlCleanupParser();
                        }
                        else{
                            parse_and_write_detail_data(buffer, doc, result_file);
                            xmlFreeDoc(doc);
                            xmlCleanupParser();
                        }
                    }
                 }

                if (days_number > 0){
                    file_out = fopen(result_file, "a");
    			    if (file_out){
                        fprintf(file_out,"</station>");
                        fclose(file_out);
                    }
		        }

            }
        }
    }
    else
        return -1;/* file isn't accessability */
    return days_number;
}
Exemplo n.º 4
0
Arquivo: dict.c Projeto: No-name/dict
GPtrArray * dict_get_translate(const gchar * keyword)
{
	gchar * key_url = NULL;
	htmlDocPtr html_doc = NULL;
	xmlDocPtr doc = NULL;
	xmlNodePtr clone = NULL;
	xmlXPathContextPtr ctx = NULL;
	xmlXPathObjectPtr obj = NULL;
	xmlNodeSetPtr nodeset = NULL;
	gchar * tmp_str = NULL;
	GPtrArray * result = NULL;
	gint i;


	key_url = g_strjoin(NULL, KEY_URL_PREFIX, keyword, KEY_URL_SUFIX, NULL);
	g_debug("KEY-URL: %s", key_url);

	html_doc = htmlReadFile(key_url, NULL, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
	if (!html_doc)
	{
		g_message("KEY-URL %s get failed", key_url);
		goto out;
	}

	ctx = xmlXPathNewContext((xmlDocPtr)html_doc);
	if (!ctx)
	{
		g_message("XPath context creat failed");
		goto out;
	}

	obj = xmlXPathEvalExpression(KEY_WORD_XPATH, ctx);
	if (!obj)
	{
		g_message("XPath eval key word xpath failed");
		goto out;
	}

	if (xmlXPathNodeSetIsEmpty(obj->nodesetval))
	{
		g_message("XPath search keyword failed");
		goto out;
	}

	nodeset = obj->nodesetval;
	g_debug("Key word node set have %d object", nodeset->nodeNr);

	clone = xmlCopyNode(nodeset->nodeTab[0], 1);
	if (!clone)
		goto out;

	doc = xmlNewDoc("1.0");
	if (!doc)
		goto out;

	xmlDocSetRootElement(doc, clone);

	xmlXPathFreeContext(ctx);
	ctx = NULL;

	xmlXPathFreeObject(obj);
	obj = NULL;

	ctx = xmlXPathNewContext(doc);
	if (!ctx)
		goto out;

	obj = xmlXPathEvalExpression("//span[@class='keyword']", ctx);
	if (!obj)
		goto out;

	nodeset = obj->nodesetval;
	tmp_str = xmlNodeGetContent(nodeset->nodeTab[0]->xmlChildrenNode);
	g_debug("The word to search %s", tmp_str);

	xmlFree(tmp_str);
	tmp_str = NULL;

	xmlXPathFreeObject(obj);
	obj = NULL;

	obj = xmlXPathEvalExpression("//ul/li", ctx);
	if (!obj)
		goto out;

	if (xmlXPathNodeSetIsEmpty(obj->nodesetval))
	{
		g_message("Result value is empty");
		goto out;
	}

	nodeset = obj->nodesetval;
	result = g_ptr_array_sized_new(nodeset->nodeNr + 1);
	for (i = 0; i < nodeset->nodeNr; ++i)
	{
		tmp_str = xmlNodeGetContent(nodeset->nodeTab[i]->xmlChildrenNode);
		g_ptr_array_add(result, tmp_str);
	}

	g_ptr_array_add(result, NULL);

out:
	if (doc)
		xmlFreeDoc(doc);

	if (key_url)
		g_free(key_url);

	if (html_doc)
		xmlFreeDoc((xmlDocPtr)html_doc);

	if (ctx)
		xmlXPathFreeContext(ctx);

	if (obj)
		xmlXPathFreeObject(obj);

	return result;
}
Exemplo n.º 5
0
convert_station_hkogovhk_data(const gchar *station_id_with_path, const gchar *result_file, const gchar *detail_path_data ){
 
    xmlDoc  *doc = NULL;
    xmlNode *root_node = NULL;
    gint    days_number = -1;
    gchar   buffer[1024],
            *delimiter = NULL;
    FILE    *file_out;

    file_out = fopen(result_file, "w");
    if (!file_out)
        return -1;
    /* prepare station id */
    *buffer = 0;
    delimiter = strrchr(station_id_with_path, '/');
    if(delimiter){
        delimiter++; /* delete '/' */
        snprintf(buffer, sizeof(buffer) - 1, "%s", delimiter);
    }
    fprintf(file_out,"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<station name=\"Station name\" id=\"%s\" xmlns=\"http://omweather.garage.maemo.org/schemas\">\n", buffer);
    fprintf(file_out," <units>\n  <t>C</t>\n  <ws>m/s</ws>\n  <wg>m/s</wg>\n  <d>km</d>\n");
    fprintf(file_out,"  <h>%%</h>  \n  <p>mmHg</p>\n </units>\n");
    fclose(file_out);
   
    parse_current_weather(detail_path_data, result_file);
    parse_forecast_weather(station_id_with_path, result_file);

    file_out = fopen(result_file, "a");
    if (file_out){
        fprintf(file_out,"</station>");
        fclose(file_out);
     }

    return 0;
    if(!station_id_with_path)
        return -1;

/* check for new file, if it exist, than rename it */
    *buffer = 0;
    snprintf(buffer, sizeof(buffer) - 1, "%s.new", station_id_with_path);
    if(!access(buffer, R_OK))
        rename(buffer, station_id_with_path);
    /* check file accessability */
    if(!access(station_id_with_path, R_OK)){
        /* check that the file containe valid data */
        doc =  htmlReadFile(station_id_with_path, "UTF-8", 0);
        if(!doc)
            return -1;
        root_node = xmlDocGetRootElement(doc);
        if(root_node->type == XML_ELEMENT_NODE &&
                strstr((char*)root_node->name, "err")){
            xmlFreeDoc(doc);
            xmlCleanupParser();
            return -2;
        }
        else{
            /* prepare station id */
            *buffer = 0;
            delimiter = strrchr(station_id_with_path, '/');
            if(delimiter){
                delimiter++; /* delete '/' */
                snprintf(buffer, sizeof(buffer) - 1, "%s", delimiter);
                delimiter = strrchr(buffer, '.');
                if(!delimiter){
                    xmlFreeDoc(doc);
                    xmlCleanupParser();
                    return -1;
                }
                *delimiter = 0;
             //   if(get_detail_data)
             //       days_number = parse_xml_detail_data(buffer, root_node, data);
             //   else
                days_number = parse_and_write_xml_data(buffer, doc, result_file);
                xmlFreeDoc(doc);
                xmlCleanupParser();
                if(!access(detail_path_data, R_OK)){
                     doc =  htmlReadFile(detail_path_data, "UTF-8", 0);
                    if(doc){
                        root_node = NULL;
                        root_node = xmlDocGetRootElement(doc);
                        if(!root_node || ( root_node->type == XML_ELEMENT_NODE &&
                                strstr((char*)root_node->name, "err"))){
                            xmlFreeDoc(doc);
                            xmlCleanupParser();
                        }
                        else{
                            parse_and_write_detail_data(buffer, doc, result_file);
                            xmlFreeDoc(doc);
                            xmlCleanupParser();
                        }
                    }
                 }

                if (days_number > 0){
                    file_out = fopen(result_file, "a");
    			    if (file_out){
                        fprintf(file_out,"</station>");
                        fclose(file_out);
                    }
		        }

            }
        }
    }
    else
        return -1;/* file isn't accessability */
    return days_number;
}
int feed_main(match_data **feed_matches, int *feed_matches_counter) {
	omnibet_match_data omnibet_match;
	GSList *cookies;
	char tmp_file[1024];
	char tmp_file2[1024];
	char tmp_file3[1024];

	memset(&omnibet_match.match_time[0], '\0', sizeof(omnibet_match.match_time));
	memset(&omnibet_match.team_home[0], '\0', sizeof(omnibet_match.team_home));
	memset(&omnibet_match.team_away[0], '\0', sizeof(omnibet_match.team_away));
	omnibet_match.score_home = 0;
	omnibet_match.score_away = 0;
	omnibet_match.stage = -1;
	omnibet_match.skip = FALSE;

	struct passwd *pw = getpwuid(getuid());
	snprintf(&tmp_file[0], sizeof(tmp_file), "%s-%u", OMNIBET_FILENAME, pw->pw_uid);
	snprintf(&tmp_file2[0], sizeof(tmp_file), "%s-%u-a", OMNIBET_FILENAME, pw->pw_uid);

	// Get our cookie from main page
	if (get_url(OMNIBET_URL1, OMNIBET_USER_AGENT, &tmp_file[0], NULL, &cookies))
		return 1;

	// Fetch actual page
	if (get_url(OMNIBET_URL2, OMNIBET_USER_AGENT, &tmp_file[0], cookies, NULL)) {
		if (cookies)
			soup_cookies_free(cookies);
		return 1;
	}

	if (cookies)
		soup_cookies_free(cookies);

	char *orig_xml = omnibet_load_file(&tmp_file[0]);
	if (!orig_xml)
		return 0;
	
	char *fixed_xml2 = omnibet_replace(orig_xml, "<strong>", "<strong custom=livescore>");
	if (!fixed_xml2)
		return 0;

	FILE *fp = fopen (&tmp_file2[0], "w");
	if (!fp) {
		printf("Cannot open output file!\n");
		return 0;
	}
	fprintf(fp, "%s\n", fixed_xml2);
	fclose(fp);

	htmlDocPtr parser = htmlReadFile(&tmp_file2[0], OMNIBET_CHARSET, 
		HTML_PARSE_RECOVER |
		//HTML_PARSE_NOBLANKS | 
		HTML_PARSE_NOERROR | 
		HTML_PARSE_NOWARNING |
#ifdef HAVE_MATE
		HTML_PARSE_NOIMPLIED | 
#endif
		HTML_PARSE_COMPACT);

	omnibet_walk_tree(xmlDocGetRootElement(parser), &omnibet_match, feed_matches, feed_matches_counter);

	xmlFreeDoc(parser);
	free(orig_xml);
	free(fixed_xml2);

	return 1;
}
Exemplo n.º 7
0
char * read_scheda(char *baseUrl, xmlChar *url, char *epgdb_root)
{
	static char buf[2048];
	int i;
	FILE *fd;
	char cachefile[strlen(epgdb_root) + strlen((char*)url) + 2];
	htmlDocPtr docScheda = NULL;
	xmlXPathContextPtr contextScheda = NULL;
	xmlXPathObjectPtr par = NULL;
	xmlChar *urlScheda = NULL;

	/* build cache filename */
	buf[0]='\0';
	strcpy(cachefile, epgdb_root);
	cachefile[strlen(epgdb_root)] = '/';
	for (i=0; i<strlen((char*)url); i++)
		if (url[i] == '/' || url[i] == '\\' || url[i] == '?' || url[i] == '&' || url[i] == '=')
			cachefile[i+strlen(epgdb_root)+1] = '_';
		else
			cachefile[i+strlen(epgdb_root)+1] = url[i];

	cachefile[i+strlen(epgdb_root)+1] = '\0';
	
	/* try to read from cache */
	fd = fopen(cachefile, "r");
	if (fd)
	{
		fread(buf, 2048, 1, fd);
		fclose(fd);
		return buf;
	}
	
	/* ok... no cache... download it! */
	urlScheda = xmlBuildURI(url, (xmlChar *)baseUrl);
	
	if (urlScheda != NULL )
	{
		docScheda = htmlReadFile((char *)urlScheda, NULL, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);

		if (docScheda != NULL )
		{
			contextScheda = xmlXPathNewContext(docScheda);

			if (contextScheda != NULL)
			{
				// Prende il primo paragrafo sotto la div con id="box"
				par = xmlXPathEvalExpression((const xmlChar *)"//div[@id='box']/p[1]", contextScheda);

				if (par != NULL && !xmlXPathNodeSetIsEmpty(par->nodesetval))
				{
					append_scheda(buf, par->nodesetval->nodeTab[0]->children, 0, 2048);
					
					xmlXPathFreeObject(par);
				}

				xmlXPathFreeContext(contextScheda);
			}

			xmlFreeDoc(docScheda);
		}
	
		xmlFree(urlScheda);
	}

	/* save the cache */
	if (strlen(buf) > 0)
	{
		fd = fopen(cachefile, "w");
		if (fd)
		{
			fwrite(buf, strlen(buf)+1, 1, fd);
			fclose(fd);
		}
	}
	
	return buf;
}
Exemplo n.º 8
0
void test_html()
{
    htmlDocPtr doc;
    doc = htmlNewDoc( "", "" );
    xmlNodePtr root_node = xmlNewNode(NULL,BAD_CAST"ap");
    //设置根节点
    xmlDocSetRootElement(doc,root_node);
// cur = xmlDocGetRootElement(doc); //获取文档根结点
    //在根节点中直接创建节点
    xmlNewTextChild(root_node, NULL, BAD_CAST "newNode1", BAD_CAST "newNode1 content");
    xmlNewTextChild(root_node, NULL, BAD_CAST "newNode2", BAD_CAST "newNode2 content");
    xmlNewTextChild(root_node, NULL, BAD_CAST "newNode3", BAD_CAST "newNode3 content");
    /*
    xmlChar *key;
    key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); //获取文本结点的文本,需用其子结点
    xmlFree(key);
    */

    //创建一个绑定在根节点的子节点
    xmlNewChild(root_node, NULL, BAD_CAST "node1",BAD_CAST "content of node1");
    /*
    xmlNodeSetContent(curNode, (xmlChar *) "content changed");//设置结点的文本内容
    //得到一个节点的内容:
    //xmlChar *value = xmlNodeGetContent(node);
    //返回值value应该使用xmlFree(value)释放内存

    xmlUnlinkNode(curNode); //将当前结点从文档中断链(unlink),这样本文档就不会再包含这个子结点
    xmlFreeNode(curNode); //手动删除断链结点的内存, 若没有xmlDelNode或者xmlRemoveNode,使用此函数

    xmlChar *uri;
    uri = xmlGetProp(cur, "uri"); //获取属性值
    xmlFree(uri); //释放内存

    xmlSetProp(curNode,BAD_(xmlChar *)"attribute", (xmlChar *) "no"); //设置当前结点的attribute属性的属性值为no
    */
    //创建一个节点,设置其内容和属性,然后加入根结点
    xmlNodePtr node = xmlNewNode(NULL,BAD_CAST"node2");
    xmlNodePtr content = xmlNewText(BAD_CAST"NODE CONTENT");
    xmlAddChild(root_node,node);
    xmlAddChild(node,content);
    xmlNewProp(node,BAD_CAST"attribute",BAD_CAST "yes");
    //通过xmlNewProp()增加一个节点的属性
    node=xmlNewChild(root_node, NULL, BAD_CAST "node3", BAD_CAST"node has attributes");
    xmlNewProp(node, BAD_CAST "attribute", BAD_CAST "no");
    //创建一个儿子和孙子节点
    node = xmlNewNode(NULL, BAD_CAST "son");
    xmlAddChild(root_node,node);
    xmlNodePtr grandson = xmlNewNode(NULL, BAD_CAST "grandson");
    xmlAddChild(node,grandson);
    xmlAddChild(grandson, xmlNewText(BAD_CAST "This is a grandson node"));
    //存储xml文档
    int nRel = xmlSaveFile("CreatedXml.xml",doc);
    if (nRel != -1)
    {
       printf("%s\n",d_ConvertCharset("GBK", "utf-8", "一个xml文档被创建\n"));
    }

    //保存文件
    /*
     * xmlSaveFormatFile (docname, doc, 1); 保存文件到磁盘,第一个参数是写入文件的名,第二个参数是一个xmlDoc结构指针,第三个参数设定为1,保证在输出上写入缩进。
     */
    xmlSaveFormatFileEnc( "-", doc, "UTF-8", 1);
    doc=htmlReadFile("ap", NULL, -1);
    xmlSaveFormatFileEnc( "-", doc, "UTF-8", 1);

}