Ejemplo n.º 1
0
int
html2text(char *text, const char *content)
{
	int ret;
	xmlNodePtr root;
	xmlErrorPtr err;

	htmlParserCtxtPtr parser;

	parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, 0);

/*	htmlCtxtUseOptions(parser, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET); */
	htmlCtxtUseOptions(parser, HTML_PARSE_RECOVER | HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
	
	
	ret = htmlParseChunk(parser, content, xmlStrlen(content), 0);
	if (ret != 0) {
		err = xmlCtxtGetLastError(parser);
		fprintf(stderr, "htmlParseChunk failure: %d: %s\n", \
			ret, err->message);
	}
	
	ret = htmlParseChunk(parser, NULL, 0, 1);
	if (ret != 0) {
		err = xmlCtxtGetLastError(parser);
		fprintf(stderr, "htmlParseChunk failure 2: %d: %s\n", \
			ret, err->message);
	}
	
	root = xmlDocGetRootElement(parser->myDoc);
	walkTree(parser->myDoc, root, text);
	
	return 0;
}
Ejemplo n.º 2
0
    void load(const xmlDocPtr doc, xml_node &node)
    {
        if (!doc)
        {
            std::string msg("XML document not well formed");
            xmlError * error = xmlCtxtGetLastError( ctx_ );
            if (error)
            {
                msg += ":\n";
                msg += error->message;
                throw config_error(msg, error->line, error->file);
            }
            else
            {
                throw config_error(msg);
            }
        }

        int iXIncludeReturn = xmlXIncludeProcessFlags(doc, options_);

        if (iXIncludeReturn < 0)
        {
            xmlFreeDoc(doc);
            throw config_error("XML XInclude error.  One or more files failed to load.");
        }

        xmlNode * root = xmlDocGetRootElement(doc);
        if (!root) {
            xmlFreeDoc(doc);
            throw config_error("XML document is empty.");
        }

        populate_tree(root, node);
        xmlFreeDoc(doc);
    }
Ejemplo n.º 3
0
    void load(std::string const& filename, xml_node &node)
    {
        boost::filesystem::path path(filename);
        if (!boost::filesystem::exists(path))
        {
            throw config_error(string("Could not load map file: File does not exist"), 0, filename);
        }

        xmlDocPtr doc = xmlCtxtReadFile(ctx_, filename.c_str(), encoding_, options_);

        if (!doc)
        {
            xmlError * error = xmlCtxtGetLastError(ctx_);
            if (error)
            {
                std::string msg("XML document not well formed:\n");
                msg += error->message;
                // remove CR
                msg = msg.substr(0, msg.size() - 1);
                throw config_error(msg, error->line, error->file);
            }
        }

        /*
          if ( ! ctx->valid )
          {
            MAPNIK_LOG_WARN(libxml2_loader) << "libxml2_loader: Failed to validate DTD.";
          }
        */
        load(doc, node);
    }
Ejemplo n.º 4
0
bool LibXMLParser::parse(const std::string& data) {
	if (xmlParseChunk(context_, data.c_str(), data.size(), false) == XML_ERR_OK) {
		return true;
	}
	xmlError* error = xmlCtxtGetLastError(context_);
	if (error->code == XML_WAR_NS_URI || error->code == XML_WAR_NS_URI_RELATIVE) {
		xmlCtxtResetLastError(context_);
		context_->errNo = XML_ERR_OK;
		return true;
	}
	return false;
}
Ejemplo n.º 5
0
/* parse an xml chunk
 *
 * returns false if the xml is invalid
 */
gboolean
gst_cmml_parser_parse_chunk (GstCmmlParser * parser,
    const gchar * data, guint size, GError ** err)
{
  gint xmlres;

  xmlres = xmlParseChunk (parser->context, data, size, 0);
  if (xmlres != XML_ERR_OK) {
    xmlErrorPtr xml_error = xmlCtxtGetLastError (parser->context);

    GST_DEBUG ("Error occurred decoding chunk %s", data);
    g_set_error (err,
        GST_LIBRARY_ERROR, GST_LIBRARY_ERROR_FAILED, xml_error->message);
    return FALSE;
  }

  return TRUE;
}
Ejemplo n.º 6
0
/**
 * Parse the given buffer and fills the given Document object with its contents.
 * Throws XmlError on parsing errors.
 *
 * The document that is passed in will be reset before being filled if not empty.
 *
 * @param pvBuf in: memory buffer to parse.
 * @param cbSize in: size of the memory buffer.
 * @param strFilename in: name fo file to parse.
 * @param doc out: document to be reset and filled with data according to file contents.
 */
void XmlMemParser::read(const void* pvBuf, size_t cbSize,
                        const RTCString &strFilename,
                        Document &doc)
{
    GlobalLock lock;
//     global.setExternalEntityLoader(ExternalEntityLoader);

    const char *pcszFilename = strFilename.c_str();

    doc.m->reset();
    if (!(doc.m->plibDocument = xmlCtxtReadMemory(m_ctxt,
                                (const char*)pvBuf,
                                (int)cbSize,
                                pcszFilename,
                                NULL,       // encoding = auto
                                XML_PARSE_NOBLANKS | XML_PARSE_NONET)))
        throw XmlError(xmlCtxtGetLastError(m_ctxt));

    doc.refreshInternals();
}
Ejemplo n.º 7
0
/**
 * Reads the given file and fills the given Document object with its contents.
 * Throws XmlError on parsing errors.
 *
 * The document that is passed in will be reset before being filled if not empty.
 *
 * @param strFilename in: name fo file to parse.
 * @param doc out: document to be reset and filled with data according to file contents.
 */
void XmlFileParser::read(const RTCString &strFilename,
                         Document &doc)
{
    GlobalLock lock;
//     global.setExternalEntityLoader(ExternalEntityLoader);

    m->strXmlFilename = strFilename;
    const char *pcszFilename = strFilename.c_str();

    ReadContext context(pcszFilename);
    doc.m->reset();
    if (!(doc.m->plibDocument = xmlCtxtReadIO(m_ctxt,
                                ReadCallback,
                                CloseCallback,
                                &context,
                                pcszFilename,
                                NULL,       // encoding = auto
                                XML_PARSE_NOBLANKS | XML_PARSE_NONET)))
        throw XmlError(xmlCtxtGetLastError(m_ctxt));

    doc.refreshInternals();
}
Ejemplo n.º 8
0
/*
 * call-seq:
 *  native_write(chunk, last_chunk)
 *
 * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
 */
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
  xmlParserCtxtPtr ctx;
  Data_Get_Struct(self, xmlParserCtxt, ctx);

  const char * chunk  = NULL;
  int size            = 0;

  if(Qnil != _chunk) {
    chunk = StringValuePtr(_chunk);
    size = RSTRING_LEN(_chunk);
  }

  if(xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
    if (!(ctx->options & XML_PARSE_RECOVER)) {
      xmlErrorPtr e = xmlCtxtGetLastError(ctx);
      Nokogiri_error_raise(NULL, e);
    }
  }

  return self;
}
Ejemplo n.º 9
0
  void se_parser::parse_output_xml(char *output, std::vector<search_snippet*> *snippets,
                                   const int &count_offset) throw (sp_exception)
  {
    _count = count_offset;

    xmlParserCtxtPtr ctxt = NULL;
    parser_context pc;
    pc._parser = this;
    pc._snippets = snippets;
    pc._current_snippet = NULL;

    xmlSAXHandler saxHandler =
    {
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      start_element_wrapper,
      end_element_wrapper,
      NULL,
      characters_wrapper,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      cdata_wrapper,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL
    };

    //mutex_lock(&se_parser::_se_parser_mutex);

    int status = 0;
    try
      {
        ctxt = xmlCreatePushParserCtxt(&saxHandler, &pc, "", 0, "");
        xmlCtxtUseOptions(ctxt,XML_PARSE_NOERROR);

        status = xmlParseChunk(ctxt,output,strlen(output),0);
      }
    catch (std::exception e)
      {
        errlog::log_error(LOG_LEVEL_PARSER,"Error %s in xml/html parsing of search results.",
                          e.what());
        //mutex_unlock(&se_parser::_se_parser_mutex);
        throw sp_exception(WB_ERR_PARSE,e.what());
      }
    catch (...) // catch everything else to avoid crashes.
      {
        std::string msg = "Unknown error in xml/html parsing of search results";
        errlog::log_error(LOG_LEVEL_PARSER,msg.c_str());
        //mutex_unlock(&se_parser::_se_parser_mutex);
        throw sp_exception(WB_ERR_PARSE,msg);
      }

    if (status == 0)
      {
        if (ctxt)
          xmlFreeParserCtxt(ctxt);
        //mutex_unlock(&se_parser::_se_parser_mutex);
      }
    else // an error occurred.
      {
        xmlErrorPtr xep = xmlCtxtGetLastError(ctxt);
        if (xep)
          {
            std::string err_msg = std::string(xep->message);
            miscutil::replace_in_string(err_msg,"\n","");
            errlog::log_error(LOG_LEVEL_PARSER, "html level parsing error (libxml2): %s",
                              err_msg.c_str());
            // check on error level.
            if (xep->level == 3) // fatal or recoverable error.
              {
                std::string msg = "libxml2 fatal error";
                errlog::log_error(LOG_LEVEL_PARSER,msg.c_str());
                if (ctxt)
                  xmlFreeParserCtxt(ctxt);
                //mutex_unlock(&se_parser::_se_parser_mutex);
                throw sp_exception(WB_ERR_PARSE,msg);
              }
            // XXX: too verbose, and confusing to users.
            else if (xep->level == 2)
              {
                std::string msg = "libxml2 recoverable error";
                errlog::log_error(LOG_LEVEL_DEBUG,msg.c_str());
                if (ctxt)
                  xmlFreeParserCtxt(ctxt);
                //mutex_unlock(&se_parser::_se_parser_mutex);
                //throw sp_exception(WB_ERR_PARSE,msg);
              }
          }
      }
  }
Ejemplo n.º 10
0
static int
exmpp_xml_control(ErlDrvData drv_data, unsigned int command,
    char *buf, int len, char **rbuf, int rlen)
{
	struct exmpp_xml_data *edd;
	ei_x_buff *to_return;
	ErlDrvBinary *bin;
	int size, ret;

	edd = (struct exmpp_xml_data *)drv_data;
	size = 0;
	bin = NULL;
	to_return = NULL;

	switch (command) {
	/*
	 * Parsing.
	 */

	case COMMAND_PARSE:
	case COMMAND_PARSE_FINAL:
		if (edd->parser == NULL) {
			/* Start a parser. */
			if (create_parser(edd) != 0) {
				to_return = exmpp_new_xbuf();
				if (to_return == NULL)
					return (-1);

				ret = RET_ERROR;
				ei_x_encode_atom(to_return,
				    "parser_setup_failed");

				break;
			}
		}

		/* Control the total size of data to parse. */
		if (!is_data_size_under_limit(&edd->ctx, len)) {
			to_return = exmpp_new_xbuf();
			if (to_return == NULL)
				return (-1);

			ret = RET_ERROR;
			ei_x_encode_atom(to_return, "stanza_too_big");

			break;
		}

		/* Run XML document parsing. */
		ret = xmlParseChunk(edd->parser, buf, len,
		    command == COMMAND_PARSE_FINAL);

		if (ret > 0) {
			xmlError *error;

			/* An error occured during parsing; most probably,
			 * XML wasn't well-formed. */
			error = xmlCtxtGetLastError(edd->parser);

			to_return = exmpp_new_xbuf();
			if (to_return == NULL)
				return (-1);

			ret = RET_ERROR;
			ei_x_encode_tuple_header(to_return, 2);
			ei_x_encode_atom(to_return, "parsing_failed");
			ei_x_encode_tuple_header(to_return, 2);
			ei_x_encode_long(to_return, error->code);
			ei_x_encode_string(to_return, error->message);

			break;
		}

		/* Return the complete tree(s). */
		ret = RET_OK;
		if (edd->ctx.complete_trees_ready) {
			/* Terminate the complete trees list. */
			ei_x_encode_empty_list(edd->ctx.complete_trees);

			to_return = edd->ctx.complete_trees;
			size = 1 + to_return->index;
			bin = driver_alloc_binary(size);
			if (bin == NULL)
				return (-1);
			bin->orig_bytes[0] = (char)ret;
			memcpy(bin->orig_bytes + 1,
			    to_return->buff, to_return->index);
		} else {
			/* We need more data to produce a tree. */
			to_return = exmpp_new_xbuf();
			if (to_return == NULL)
				return (-1);

			ei_x_encode_atom(to_return,
			    command == COMMAND_PARSE ? "continue" : "done");
		}

		if (command == COMMAND_PARSE) {
			/* Update the size of processed data. */
			add_data_size(&edd->ctx, len);

			/* Reset the complete trees list. */
			reset_complete_trees(&edd->ctx);
		} else {
			/* We're done with the parser. */
			destroy_parser(edd);
		}

		break;

	case COMMAND_RESET_PARSER:
		if (edd->parser != NULL) {
			xmlCtxtResetPush(edd->parser, NULL, 0, NULL, NULL);
		}
		ret = RET_OK;
		break;

	/*
	 * Misc.
	 */

	case COMMAND_PORT_REVISION:
		/* Store the revision in the buffer. */
		to_return = exmpp_new_xbuf();
		if (to_return == NULL)
			return (-1);

		ret = RET_OK;
		ei_x_encode_string(to_return, "$Revision: 809 $");

		break;

	default:
		/* Other commands are handled in 'exmpp_xml.c' */
		to_return = exmpp_new_xbuf();
		if (to_return == NULL)
			return (-1);

		ret = control(&edd->ctx, command, buf, to_return);
		if (ret < 0)
			return (-1);
	}

	if (bin == NULL) {
		if (to_return != NULL) {
			size = 1 + to_return->index;
			bin = driver_alloc_binary(size);
			if (bin == NULL)
				return (-1);
			bin->orig_bytes[0] = (char)ret;
			if (to_return->index > 0)
				memcpy(bin->orig_bytes + 1,
				    to_return->buff, to_return->index);
			exmpp_free_xbuf(to_return);
		} else {
			/* The command called doesn't return anything. */
			size = 1;
			bin = driver_alloc_binary(size);
			bin->orig_bytes[0] = RET_OK;
		}
	}

	/* Set the returned buffer. */
	*rbuf = (char *)bin;

	/* Return the size of this buffer. */
	return (size);
}
Ejemplo n.º 11
0
Archivo: xlibxml.c Proyecto: GNOME/gxml
/**
 * gxml_context_get_last_error:
 *
 * Deprecated: 0.15
 */
xmlErrorPtr gxml_context_get_last_error (void* ctx)
{
  g_return_val_if_fail (ctx != NULL, NULL);
  return xmlCtxtGetLastError (ctx);
}