コード例 #1
0
ファイル: samiparse.c プロジェクト: kuailexs/symbiandump-mw1
EXPORT_C
#endif


void
sami_context_deinit (ParserState * state)
{
  GstSamiContext *context = (GstSamiContext *) state->user_data;

  if (context) {
    htmlParserCtxtPtr htmlctxt = context->htmlctxt;

    /* destroy sax context */
    htmlDocPtr doc;

    htmlParseChunk (htmlctxt, "", 0, 1);
    doc = htmlctxt->myDoc;
    htmlFreeParserCtxt (htmlctxt);
    context->htmlctxt = NULL;
    if (doc)
      xmlFreeDoc (doc);
    g_string_free (context->buf, TRUE);
    g_string_free (context->rubybuf, TRUE);
    g_string_free (context->resultbuf, TRUE);
    g_string_free (context->state, TRUE);
    g_free (context);
    state->user_data = NULL;
  }
}
コード例 #2
0
ファイル: utils.c プロジェクト: UIKit0/libgrss
static void
_unhtmlize (gchar *string, ResultBuffer *buffer)
{
	htmlParserCtxtPtr ctxt;
	htmlSAXHandlerPtr sax_p;

	sax_p = g_new0 (htmlSAXHandler, 1);
 	sax_p->characters = unhtmlizeHandleCharacters;
	ctxt = htmlCreatePushParserCtxt (sax_p, buffer, string, strlen (string), "", XML_CHAR_ENCODING_UTF8);
	htmlParseChunk (ctxt, string, 0, 1);
	htmlFreeParserCtxt (ctxt);
 	g_free (sax_p);
}
コード例 #3
0
ファイル: htmltitle.cpp プロジェクト: 08142008/curl
static void parseHtml(const std::string &html,
                      std::string &title)
{
  htmlParserCtxtPtr ctxt;
  Context context;

  ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
                                  XML_CHAR_ENCODING_NONE);

  htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
  htmlParseChunk(ctxt, "", 0, 1);

  htmlFreeParserCtxt(ctxt);

  title = context.title;
}
コード例 #4
0
ファイル: parser.hpp プロジェクト: kod3r/wookie
    void parse(const std::string &page) {
        htmlParserCtxtPtr ctxt;

        htmlSAXHandler handler;
        memset(&handler, 0, sizeof(handler));

        handler.startElement = static_parser_start_element;
        handler.endElement = static_parser_end_element;
        handler.characters = static_parser_characters;

        ctxt = htmlCreatePushParserCtxt(&handler, this, "", 0, "", XML_CHAR_ENCODING_NONE);

        htmlParseChunk(ctxt, page.c_str(), page.size(), 0);
        htmlParseChunk(ctxt, "", 0, 1);

        htmlFreeParserCtxt(ctxt);
    }
コード例 #5
0
ファイル: XMLDocument.cpp プロジェクト: ScilabOrg/scilab
xmlDoc *XMLDocument::readHTMLDocument(const std::string & htmlCode, const char * encoding, std::string * error)
{
    htmlParserCtxt *ctxt = initHTMLContext(error);
    htmlDocPtr doc = 0;
    int options = HTML_PARSE_NOWARNING | HTML_PARSE_NOBLANKS | HTML_PARSE_COMPACT;

    if (!ctxt)
    {
        xmlSetGenericErrorFunc(0, errorFunctionWithoutOutput);
        return 0;
    }

    doc = htmlCtxtReadDoc(ctxt, (const xmlChar *)htmlCode.c_str(), 0, encoding, options);
    if (!doc || !ctxt->valid)
    {
        *error = errorBuffer;
    }

    xmlSetGenericErrorFunc(0, errorFunctionWithoutOutput);
    htmlFreeParserCtxt(ctxt);

    return (xmlDoc *)doc;
}
コード例 #6
0
ファイル: html_read.c プロジェクト: GNOME/gnumeric
void
html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context,
		WorkbookView *wb_view, GsfInput *input)
{
	guint8 const *buf;
	gsf_off_t size;
	int len, bomlen;
	htmlParserCtxtPtr ctxt;
	htmlDocPtr doc = NULL;
	xmlCharEncoding enc;
	GnmHtmlTableCtxt tc;

	g_return_if_fail (input != NULL);

	if (gsf_input_seek (input, 0, G_SEEK_SET))
		return;

	size = gsf_input_size (input);
	if (size >= 4) {
		size -= 4;
		buf = gsf_input_read (input, 4, NULL);
		if (buf != NULL) {
			enc = xmlDetectCharEncoding(buf, 4);
			switch (enc) {	/* Skip byte order mark */
			case XML_CHAR_ENCODING_UCS4BE:
			case XML_CHAR_ENCODING_UCS4LE:
			case XML_CHAR_ENCODING_UCS4_2143:
			case XML_CHAR_ENCODING_UCS4_3412:
			case XML_CHAR_ENCODING_EBCDIC:
				bomlen = 4;
				break;
			case XML_CHAR_ENCODING_UTF16BE:
			case XML_CHAR_ENCODING_UTF16LE:
				bomlen = 2;
				break;
			case XML_CHAR_ENCODING_UTF8:
				if (buf[0] == 0xef)
					bomlen = 3;
				else if (buf[0] == 0x3c)
					bomlen = 4;
				else
					bomlen = 0;
				break;
			case XML_CHAR_ENCODING_NONE:
				bomlen = 0;
				/* Try to detect unmarked UTF16LE
				   (Firefox Windows clipboard, drag data all platforms) */
				if ((buf[0] >= 0x20 || g_ascii_isspace(buf[0])) &&
				    buf[1] == 0 &&
				    (buf[2] >= 0x20 || g_ascii_isspace(buf[2])) &&
				    buf[3] == 0)
					enc =  XML_CHAR_ENCODING_UTF16LE;
				break;
			default:
				bomlen = 0;
			}
			ctxt = htmlCreatePushParserCtxt (
				NULL, NULL, (char const *)(buf + bomlen),
				4 - bomlen, gsf_input_name (input), enc);

			for (; size > 0 ; size -= len) {
				len = MIN (4096, size);
				buf = gsf_input_read (input, len, NULL);
				if (buf == NULL)
					break;
				htmlParseChunk (
					ctxt, (char const *)buf, len, 0);
			}

			htmlParseChunk (ctxt, (char const *)buf, 0, 1);
			doc = ctxt->myDoc;
			htmlFreeParserCtxt (ctxt);
		}
	}

	if (doc != NULL) {
		xmlNodePtr ptr;
		tc.sheet = NULL;
		tc.row   = -1;
		tc.wb_view = wb_view;
		for (ptr = doc->children; ptr != NULL ; ptr = ptr->next)
			html_search_for_tables (ptr, doc, wb_view, &tc);
		xmlFreeDoc (doc);
	} else
		go_io_error_info_set (io_context,
			go_error_info_new_str (_("Unable to parse the html.")));
}
コード例 #7
0
ファイル: HttpPullData.cpp プロジェクト: ehelms/sunray
	void HttpPullData::parseHtml(const std::string &html,int pos,PARSE_ACTION action){
	  htmlParserCtxtPtr ctxt;
	  Context context;
	  ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",XML_CHAR_ENCODING_NONE);
	  htmlParseChunk(ctxt,html.c_str()+pos,max((unsigned long)0,html.size()-pos), 0);
	  htmlParseChunk(ctxt, "", 0, 1);
	  htmlFreeParserCtxt(ctxt);
	  switch(action){
	  	case GET_OPTIONS:
	  		options=context.options;
	  		break;
	 	case GET_OZONE:
	 		ozone=-1;
	  		for(unsigned int i=0;i<context.tableText.size();i++){
	  			if(context.tableText[i].find(" DU",0)>=0){
	  				ozone=atof(StringTools::stripString(context.tableText[i]," DU").c_str());
	  				break;
	  			}
	  		}	 	
	  		break;
	  	case GET_LOCATION:
	  		for(int i=0;i<context.boldText.size();i++){
	  			if(context.boldText[i]=="Ecoregion "){
	  				info.ecoregion=context.boldText[i+1];
	  			} else if(context.boldText[i]=="Latitude"){
	  	  			info.latitude=atof(StringTools::stripString(context.boldText[i+1]," N").c_str());
	  			} else if(context.boldText[i]=="Longitude"){
	  				if((int)context.boldText[i+1].find(" W")>=0){
	  					info.longitude=atof(StringTools::stripString(context.boldText[i+1]," W").c_str());
	  				} else if((int)context.boldText[i+1].find(" E")>=0){
	  					info.longitude=-atof(StringTools::stripString(context.boldText[i+1]," E").c_str());
	  				}
	  			} else if(context.boldText[i]=="Elevation"){
	  				info.elevation=atof(context.boldText[i+1].c_str());
	  			}
	  		}
	  	case GET_AEROSOL_VISIBLE:
	  		for(int i=0;i<context.tableText.size();i++){
	  			if(context.tableText[i]==" Morning  "){
	  				aerosols.depthsAM[2]=StringTools::stof(context.tableText[i+1]);
	  				aerosols.depthsAM[3]=StringTools::stof(context.tableText[i+2]);
	  				aerosols.depthsAM[4]=StringTools::stof(context.tableText[i+3]);
	  				aerosols.depthsAM[5]=StringTools::stof(context.tableText[i+4]);
	  				aerosols.depthsAM[6]=StringTools::stof(context.tableText[i+5]);
	  			}
	  			if(context.tableText[i]==" Afternoon  "){
	  				aerosols.depthsPM[2]=StringTools::stof(context.tableText[i+1]);
	  				aerosols.depthsPM[3]=StringTools::stof(context.tableText[i+2]);
	  				aerosols.depthsPM[4]=StringTools::stof(context.tableText[i+3]);
	  				aerosols.depthsPM[5]=StringTools::stof(context.tableText[i+4]);
	  				aerosols.depthsPM[6]=StringTools::stof(context.tableText[i+5]);
	  			}
	  			
			}
			break;
	  	case GET_AEROSOL_UV:
	  		for(int i=0;i<context.tableText.size();i++){
	  			if(context.tableText[i]==" Morning  "){
	  				aerosols.depthsAM[0]=StringTools::stof(context.tableText[i+1]);
	  				aerosols.depthsAM[1]=StringTools::stof(context.tableText[i+2]);
	  			}
	  			if(context.tableText[i]==" Afternoon  "){
	  				aerosols.depthsPM[0]=StringTools::stof(context.tableText[i+1]);
	  				aerosols.depthsPM[1]=StringTools::stof(context.tableText[i+2]);
	  			}
	  			
			}
			break;
		default:
	  		break;
	  }
	}