Example #1
0
static void f_parse_html(INT32 args)
{
  xmlDocPtr   doc = NULL;
  char * encoding = "utf-8";
  struct pike_string *encode_data = NULL;
  
  if ( args == 1 ) {
      if ( ARG(1).type != T_STRING ) 
	  Pike_error("Incorrect type for argument 0: expected string (encoding)\n");
      encode_data = ARG(1).u.string;
      encoding = encode_data->str;
  }
  // do nothing
  if ( THIS->input_data->len == 0 )
    push_int(0);

  switch (THIS->parsing_method) {
      case PARSE_PUSH_PARSER:
        Pike_error("Push parser not implemented yet. Please bug [email protected] to implement it.");
        
      case PARSE_MEMORY_PARSER:
        htmlHandleOmittedElem(1);
	doc=htmlSAXParseDoc(THIS->input_data->str, encoding, THIS->sax, NULL);
        break;

      case PARSE_FILE_PARSER:
        htmlHandleOmittedElem(1);
	doc=htmlSAXParseFile(THIS->input_data->str, "utf-8", THIS->sax, NULL);
	break;
  }
  if ( doc != NULL )
    xmlFreeDoc(doc);
  
  push_int(0);
}
Example #2
0
static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding)
{
    xmlSAXHandlerPtr handler;
    Data_Get_Struct(self, xmlSAXHandler, handler);
    htmlSAXParseDoc(  (xmlChar *)StringValuePtr(data),
                      (const char *)StringValuePtr(encoding),
                      (htmlSAXHandlerPtr)handler,
                      (void *)self );
    return data;
}
Example #3
0
/*
 * call-seq:
 *  native_parse_memory(data, encoding)
 *
 * Parse +data+ with +encoding+
 */
static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding)
{
  xmlSAXHandlerPtr handler;
  htmlDocPtr hdoc ;
  Data_Get_Struct(self, xmlSAXHandler, handler);
  hdoc = htmlSAXParseDoc(  (xmlChar *)StringValuePtr(data),
                           (const char *)StringValuePtr(encoding),
                           (htmlSAXHandlerPtr)handler,
                           NOKOGIRI_SAX_TUPLE_NEW(NULL, self) );
  xmlFreeDoc(hdoc);
  return data;
}
Example #4
0
gchar*
feed_remove_markup (gchar* markup)
{
    const xmlChar* stag;
    if (((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>')) ||
         xmlStrchr (BAD_CAST markup, '&'))
    {
        gchar* text = NULL;
        htmlSAXHandlerPtr psax;

        psax = g_new0 (htmlSAXHandler, 1);
        psax->characters = handle_markup_chars;
        htmlSAXParseDoc (BAD_CAST markup, "UTF-8", psax, &text);
        g_free (psax);
        g_free (markup);
        return text;
    }
    return markup;
}