static void f_parse_html(INT32 args) { xmlDocPtr doc = NULL; char * encoding = "utf-8"; struct pike_string *encode_data = NULL; if ( args == 1 ) { if ( ARG(1).type != T_STRING ) Pike_error("Incorrect type for argument 0: expected string (encoding)\n"); encode_data = ARG(1).u.string; encoding = encode_data->str; } // do nothing if ( THIS->input_data->len == 0 ) push_int(0); switch (THIS->parsing_method) { case PARSE_PUSH_PARSER: Pike_error("Push parser not implemented yet. Please bug [email protected] to implement it."); case PARSE_MEMORY_PARSER: htmlHandleOmittedElem(1); doc=htmlSAXParseDoc(THIS->input_data->str, encoding, THIS->sax, NULL); break; case PARSE_FILE_PARSER: htmlHandleOmittedElem(1); doc=htmlSAXParseFile(THIS->input_data->str, "utf-8", THIS->sax, NULL); break; } if ( doc != NULL ) xmlFreeDoc(doc); push_int(0); }
static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding) { xmlSAXHandlerPtr handler; Data_Get_Struct(self, xmlSAXHandler, handler); htmlSAXParseDoc( (xmlChar *)StringValuePtr(data), (const char *)StringValuePtr(encoding), (htmlSAXHandlerPtr)handler, (void *)self ); return data; }
/* * call-seq: * native_parse_memory(data, encoding) * * Parse +data+ with +encoding+ */ static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding) { xmlSAXHandlerPtr handler; htmlDocPtr hdoc ; Data_Get_Struct(self, xmlSAXHandler, handler); hdoc = htmlSAXParseDoc( (xmlChar *)StringValuePtr(data), (const char *)StringValuePtr(encoding), (htmlSAXHandlerPtr)handler, NOKOGIRI_SAX_TUPLE_NEW(NULL, self) ); xmlFreeDoc(hdoc); return data; }
gchar* feed_remove_markup (gchar* markup) { const xmlChar* stag; if (((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>')) || xmlStrchr (BAD_CAST markup, '&')) { gchar* text = NULL; htmlSAXHandlerPtr psax; psax = g_new0 (htmlSAXHandler, 1); psax->characters = handle_markup_chars; htmlSAXParseDoc (BAD_CAST markup, "UTF-8", psax, &text); g_free (psax); g_free (markup); return text; } return markup; }