EXPORT_C #endif void sami_context_deinit (ParserState * state) { GstSamiContext *context = (GstSamiContext *) state->user_data; if (context) { htmlParserCtxtPtr htmlctxt = context->htmlctxt; /* destroy sax context */ htmlDocPtr doc; htmlParseChunk (htmlctxt, "", 0, 1); doc = htmlctxt->myDoc; htmlFreeParserCtxt (htmlctxt); context->htmlctxt = NULL; if (doc) xmlFreeDoc (doc); g_string_free (context->buf, TRUE); g_string_free (context->rubybuf, TRUE); g_string_free (context->resultbuf, TRUE); g_string_free (context->state, TRUE); g_free (context); state->user_data = NULL; } }
static void _unhtmlize (gchar *string, ResultBuffer *buffer) { htmlParserCtxtPtr ctxt; htmlSAXHandlerPtr sax_p; sax_p = g_new0 (htmlSAXHandler, 1); sax_p->characters = unhtmlizeHandleCharacters; ctxt = htmlCreatePushParserCtxt (sax_p, buffer, string, strlen (string), "", XML_CHAR_ENCODING_UTF8); htmlParseChunk (ctxt, string, 0, 1); htmlFreeParserCtxt (ctxt); g_free (sax_p); }
static void parseHtml(const std::string &html, std::string &title) { htmlParserCtxtPtr ctxt; Context context; ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "", XML_CHAR_ENCODING_NONE); htmlParseChunk(ctxt, html.c_str(), html.size(), 0); htmlParseChunk(ctxt, "", 0, 1); htmlFreeParserCtxt(ctxt); title = context.title; }
void parse(const std::string &page) { htmlParserCtxtPtr ctxt; htmlSAXHandler handler; memset(&handler, 0, sizeof(handler)); handler.startElement = static_parser_start_element; handler.endElement = static_parser_end_element; handler.characters = static_parser_characters; ctxt = htmlCreatePushParserCtxt(&handler, this, "", 0, "", XML_CHAR_ENCODING_NONE); htmlParseChunk(ctxt, page.c_str(), page.size(), 0); htmlParseChunk(ctxt, "", 0, 1); htmlFreeParserCtxt(ctxt); }
xmlDoc *XMLDocument::readHTMLDocument(const std::string & htmlCode, const char * encoding, std::string * error) { htmlParserCtxt *ctxt = initHTMLContext(error); htmlDocPtr doc = 0; int options = HTML_PARSE_NOWARNING | HTML_PARSE_NOBLANKS | HTML_PARSE_COMPACT; if (!ctxt) { xmlSetGenericErrorFunc(0, errorFunctionWithoutOutput); return 0; } doc = htmlCtxtReadDoc(ctxt, (const xmlChar *)htmlCode.c_str(), 0, encoding, options); if (!doc || !ctxt->valid) { *error = errorBuffer; } xmlSetGenericErrorFunc(0, errorFunctionWithoutOutput); htmlFreeParserCtxt(ctxt); return (xmlDoc *)doc; }
void html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context, WorkbookView *wb_view, GsfInput *input) { guint8 const *buf; gsf_off_t size; int len, bomlen; htmlParserCtxtPtr ctxt; htmlDocPtr doc = NULL; xmlCharEncoding enc; GnmHtmlTableCtxt tc; g_return_if_fail (input != NULL); if (gsf_input_seek (input, 0, G_SEEK_SET)) return; size = gsf_input_size (input); if (size >= 4) { size -= 4; buf = gsf_input_read (input, 4, NULL); if (buf != NULL) { enc = xmlDetectCharEncoding(buf, 4); switch (enc) { /* Skip byte order mark */ case XML_CHAR_ENCODING_UCS4BE: case XML_CHAR_ENCODING_UCS4LE: case XML_CHAR_ENCODING_UCS4_2143: case XML_CHAR_ENCODING_UCS4_3412: case XML_CHAR_ENCODING_EBCDIC: bomlen = 4; break; case XML_CHAR_ENCODING_UTF16BE: case XML_CHAR_ENCODING_UTF16LE: bomlen = 2; break; case XML_CHAR_ENCODING_UTF8: if (buf[0] == 0xef) bomlen = 3; else if (buf[0] == 0x3c) bomlen = 4; else bomlen = 0; break; case XML_CHAR_ENCODING_NONE: bomlen = 0; /* Try to detect unmarked UTF16LE (Firefox Windows clipboard, drag data all platforms) */ if ((buf[0] >= 0x20 || g_ascii_isspace(buf[0])) && buf[1] == 0 && (buf[2] >= 0x20 || g_ascii_isspace(buf[2])) && buf[3] == 0) enc = XML_CHAR_ENCODING_UTF16LE; break; default: bomlen = 0; } ctxt = htmlCreatePushParserCtxt ( NULL, NULL, (char const *)(buf + bomlen), 4 - bomlen, gsf_input_name (input), enc); for (; size > 0 ; size -= len) { len = MIN (4096, size); buf = gsf_input_read (input, len, NULL); if (buf == NULL) break; htmlParseChunk ( ctxt, (char const *)buf, len, 0); } htmlParseChunk (ctxt, (char const *)buf, 0, 1); doc = ctxt->myDoc; htmlFreeParserCtxt (ctxt); } } if (doc != NULL) { xmlNodePtr ptr; tc.sheet = NULL; tc.row = -1; tc.wb_view = wb_view; for (ptr = doc->children; ptr != NULL ; ptr = ptr->next) html_search_for_tables (ptr, doc, wb_view, &tc); xmlFreeDoc (doc); } else go_io_error_info_set (io_context, go_error_info_new_str (_("Unable to parse the html."))); }
void HttpPullData::parseHtml(const std::string &html,int pos,PARSE_ACTION action){ htmlParserCtxtPtr ctxt; Context context; ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",XML_CHAR_ENCODING_NONE); htmlParseChunk(ctxt,html.c_str()+pos,max((unsigned long)0,html.size()-pos), 0); htmlParseChunk(ctxt, "", 0, 1); htmlFreeParserCtxt(ctxt); switch(action){ case GET_OPTIONS: options=context.options; break; case GET_OZONE: ozone=-1; for(unsigned int i=0;i<context.tableText.size();i++){ if(context.tableText[i].find(" DU",0)>=0){ ozone=atof(StringTools::stripString(context.tableText[i]," DU").c_str()); break; } } break; case GET_LOCATION: for(int i=0;i<context.boldText.size();i++){ if(context.boldText[i]=="Ecoregion "){ info.ecoregion=context.boldText[i+1]; } else if(context.boldText[i]=="Latitude"){ info.latitude=atof(StringTools::stripString(context.boldText[i+1]," N").c_str()); } else if(context.boldText[i]=="Longitude"){ if((int)context.boldText[i+1].find(" W")>=0){ info.longitude=atof(StringTools::stripString(context.boldText[i+1]," W").c_str()); } else if((int)context.boldText[i+1].find(" E")>=0){ info.longitude=-atof(StringTools::stripString(context.boldText[i+1]," E").c_str()); } } else if(context.boldText[i]=="Elevation"){ info.elevation=atof(context.boldText[i+1].c_str()); } } case GET_AEROSOL_VISIBLE: for(int i=0;i<context.tableText.size();i++){ if(context.tableText[i]==" Morning "){ aerosols.depthsAM[2]=StringTools::stof(context.tableText[i+1]); aerosols.depthsAM[3]=StringTools::stof(context.tableText[i+2]); aerosols.depthsAM[4]=StringTools::stof(context.tableText[i+3]); aerosols.depthsAM[5]=StringTools::stof(context.tableText[i+4]); aerosols.depthsAM[6]=StringTools::stof(context.tableText[i+5]); } if(context.tableText[i]==" Afternoon "){ aerosols.depthsPM[2]=StringTools::stof(context.tableText[i+1]); aerosols.depthsPM[3]=StringTools::stof(context.tableText[i+2]); aerosols.depthsPM[4]=StringTools::stof(context.tableText[i+3]); aerosols.depthsPM[5]=StringTools::stof(context.tableText[i+4]); aerosols.depthsPM[6]=StringTools::stof(context.tableText[i+5]); } } break; case GET_AEROSOL_UV: for(int i=0;i<context.tableText.size();i++){ if(context.tableText[i]==" Morning "){ aerosols.depthsAM[0]=StringTools::stof(context.tableText[i+1]); aerosols.depthsAM[1]=StringTools::stof(context.tableText[i+2]); } if(context.tableText[i]==" Afternoon "){ aerosols.depthsPM[0]=StringTools::stof(context.tableText[i+1]); aerosols.depthsPM[1]=StringTools::stof(context.tableText[i+2]); } } break; default: break; } }