示例#1
0
/* the entry point */
void html2nodes(const char *htmltext, bool startpage)
{
	char *htmlfix = 0;

	tdoc = tidyCreate();
	if (!startpage)
		tidyOptSetInt(tdoc, TidyBodyOnly, yes);
	tidySetReportFilter(tdoc, tidyErrorHandler);
//    tidySetReportFilter(tdoc, tidyReportFilter);

	tidySetCharEncoding(tdoc, (cons_utf8 ? "utf8" : "latin1"));

	htmlfix = tidyPreprocess(htmltext);
	if (htmlfix) {
		tidyParseString(tdoc, htmlfix);
		nzFree(htmlfix);
	} else
		tidyParseString(tdoc, htmltext);

	tidyCleanAndRepair(tdoc);

	if (debugLevel >= 5) {
		traverse_tidycall = printNode;
		traverseTidy();
	}

/* convert tidy nodes into edbrowse nodes */
	traverse_tidycall = convertNode;
	traverseTidy();

	tidyRelease(tdoc);
}				/* html2nodes */
示例#2
0
文件: tdoc.c 项目: nuxlli/wax
// parses the lua string to a 'dom' like object.
int lua_tidy_parseString( lua_State *L)
{
    pTidy t = toTidy(L,1);
    const char * source = lua_tostring(L,2);
    lua_pushnumber(L, tidyParseString(t->tdoc, source));
    return 1;
}
int main(int argc, char *argv[]) {
	CURL *curl;
	int counter = 0;
	if (argc < 2) return 1;

	curl = curl_easy_init();
	TidyDoc tdoc = tidyCreate();
	TidyBuffer output = {0};
	tidyOptSetBool(tdoc, TidyXmlOut, yes);
	tidyOptSetBool(tdoc, TidyShowWarnings, no);
	tidyOptSetInt(tdoc, TidyWrapLen, 0);

	for(int i=0; i < 20; i++) {
//		tidyBufFree(&output);
		tidyBufClear(&output);
	//	tidyParseFile(tdoc, argv[1]);
		tidyParseString(tdoc, getpage(curl,i).c_str());
		tidySaveBuffer(tdoc, &output); 
	//	tidySaveFile(tdoc, "tidy_test.xml");

	//	doc.LoadFile(argv[1]);
	//	doc.LoadFile("tidy_test.xml");
		parseTidyBuf(output, counter);
	}

	curl_easy_cleanup(curl);

	return 0;
}
示例#4
0
文件: util.cpp 项目: msjoberg/pumpa
QString tidyHtml(QString str, bool& ok) {
#ifdef NO_TIDY
  ok = true;
  return str;
#else
  QString res = str;
  ok = false;

  static bool isTidyWithIntBodyOnly = isTidyWithIntBodyOnlyCheck();
  
  TidyDoc tdoc = tidyCreate();
  TidyBuffer output;
  TidyBuffer errbuf;

  tidyBufInit(&output);
  tidyBufInit(&errbuf);

  bool configOk = 
    tidyOptSetBool(tdoc, TidyXhtmlOut, yes) && 
    tidyOptSetBool(tdoc, TidyForceOutput, yes) &&
    tidyOptSetBool(tdoc, TidyMark, no) &&
    (isTidyWithIntBodyOnly
     ? tidyOptSetInt(tdoc, TidyBodyOnly, 1)
     : tidyOptSetBool(tdoc, TidyBodyOnly, yes)) &&
    tidyOptSetInt(tdoc, TidyWrapLen, 0) &&
    tidyOptSetInt(tdoc, TidyDoctypeMode, TidyDoctypeOmit);
    
  if (configOk &&
      (tidySetCharEncoding(tdoc, "utf8") >= 0) &&
      (tidySetErrorBuffer(tdoc, &errbuf) >= 0) &&
      (tidyParseString(tdoc, str.toUtf8().data()) >= 0) &&
      (tidyCleanAndRepair(tdoc) >= 0) &&
      (tidyRunDiagnostics(tdoc) >= 0) &&
      (tidySaveBuffer(tdoc, &output) >= 0) &&
      (output.bp != 0 && output.size > 0)) {
    res = QString::fromUtf8((char*)output.bp, output.size);

    ok = true;
  }

#ifdef DEBUG_MARKUP
  if (errbuf.size > 0) {
    QString errStr =  QString::fromUtf8((char*)errbuf.bp, errbuf.size);
    qDebug() << "\n[DEBUG] MARKUP, libtidy errors and warnings:\n" << errStr;
  }
#endif

  if (output.bp != 0)
    tidyBufFree(&output);
  if (errbuf.bp != 0)
    tidyBufFree(&errbuf);
  tidyRelease(tdoc);

  return res.trimmed();
#endif
}
示例#5
0
void tidyhtml::parse(std::string x, std::string path)
{
        tidySetErrorBuffer(tdoc, NULL);
        tidyParseString(tdoc, x.c_str());
        tidyCleanAndRepair( tdoc );
        tidyRunDiagnostics( tdoc );
        tidySaveFile(tdoc, path.c_str());


}
示例#6
0
std::string cleanHTML (std::string html)
{


  TidyDoc tidyDoc = tidyCreate();
    TidyBuffer tidyOutputBuffer = {0};



// Configure Tidy
    // The flags tell Tidy to output XML and disable showing warnings
    bool configSuccess = tidyOptSetBool(tidyDoc, TidyXmlOut, yes)
        && tidyOptSetBool(tidyDoc, TidyQuiet, yes)
        && tidyOptSetBool(tidyDoc, TidyNumEntities, yes)
        && tidyOptSetBool(tidyDoc, TidyShowWarnings, no);

	tidyOptSetValue(tidyDoc,TidyForceOutput,"true");

int tidyResponseCode = -1;

    // Parse input
    if (configSuccess)
        tidyResponseCode = tidyParseString(tidyDoc, html.c_str());


 
    // Process HTML
    if (tidyResponseCode >= 0)
        tidyResponseCode = tidyCleanAndRepair(tidyDoc);
 
    // Output the HTML to our buffer
    if (tidyResponseCode >= 0)
        tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer);

// Any errors from Tidy?
    if (tidyResponseCode < 0)
        throw ("Tidy encountered an error while parsing an HTML response. Tidy response code: " + tidyResponseCode);

	
 
    // Grab the result from the buffer and then free Tidy's memory

    std::string tidyResult = (char*)tidyOutputBuffer.bp;
	   
    tidyBufFree(&tidyOutputBuffer);
    tidyRelease(tidyDoc);
 
    return tidyResult;


}
示例#7
0
static html_valid_status_t
html_valid_run (html_valid_t * htv, SV * html,
		SV ** output_ptr, SV ** errors_ptr)
{
    const char * html_string;
    STRLEN html_length;
    SV * output;
    SV * errors;

    TidyBuffer tidy_output = {0};
    TidyBuffer tidy_errbuf = {0};

    /* First set these up sanely in case the stuff hits the fan. */

    * output_ptr = & PL_sv_undef;
    * errors_ptr = & PL_sv_undef;

    /* Work around bug where allocator sometimes does not get set. */

    CopyAllocator (htv->tdoc, & tidy_output);
    CopyAllocator (htv->tdoc, & tidy_errbuf);

    html_string = SvPV (html, html_length);
    CALL_TIDY (tidySetErrorBuffer (htv->tdoc, & tidy_errbuf));
    htv->n_mallocs++;
    CALL_TIDY (tidyParseString (htv->tdoc, html_string));
    CALL_TIDY (tidyCleanAndRepair (htv->tdoc));
    CALL_TIDY (tidyRunDiagnostics (htv->tdoc));
    CALL_TIDY (tidySaveBuffer (htv->tdoc, & tidy_output));
    htv->n_mallocs++;

    /* Copy the contents of the buffers into the Perl scalars. */

    output = newSVpv ((char *) tidy_output.bp, tidy_output.size);
    errors = newSVpv ((char *) tidy_errbuf.bp, tidy_errbuf.size);

    /* HTML Tidy randomly segfaults here due to "allocator" not being
       set in some cases, hence the above CopyAllocator fix. */

    tidyBufFree (& tidy_output);
    htv->n_mallocs--;
    tidyBufFree (& tidy_errbuf);
    htv->n_mallocs--;

    /* These are not our mallocs, they are Perl's mallocs, so we don't
       increase htv->n_mallocs for these. After we return them, we no
       longer take care of these. */
    * output_ptr = output;
    * errors_ptr = errors;
    return html_valid_ok;
}
void TidyNetworkReply::tidyUp() {
    QUrl redirect = reply->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl();
    if (redirect.isValid()) {
        redirect.setScheme("tidy");
        setAttribute(QNetworkRequest::RedirectionTargetAttribute, QVariant(redirect));

        emit finished();
        reply->deleteLater();
        return;
    }

    int rc = -1;
    Bool ok;

    ok = tidyOptSetBool( tdoc, TidyXmlOut, yes );  // Convert to XHTML
    if (ok)
        ok = tidyOptSetBool(tdoc, TidyQuoteNbsp, no);
    //if (ok)
    //ok = tidyOptSetValue(tdoc, TidyBlockTags, "header,nav,article,time,section,footer");
    if ( ok )
        rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
    if ( rc >= 0 )
        rc = tidyParseString( tdoc, reply->readAll() );           // Parse the input
    if ( rc >= 0 )
        rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
    if ( rc >= 0 )
        rc = tidyRunDiagnostics( tdoc );               // Kvetch
    if ( rc > 1 )                                    // If error, force output.
        rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
    if ( rc >= 0 )
        rc = tidySaveBuffer( tdoc, &output );          // Pretty Print

    if ( rc >= 0 ) {
        if ( rc > 0 ) {
            ;//printf( "\nDiagnostics:\n\n%s", errbuf.bp );
        }
    } else {
        ;//printf( "A severe error (%d) occurred.\n", rc );
    }

    open(ReadOnly);
    emit readyRead();
    emit finished();

    reply->deleteLater();
    //QTimer::singleShot(0, this, SIGNAL(readyRead()));
    //QTimer::singleShot(0, this, SIGNAL(finished()));
}
示例#9
0
void HTMLTidy::run() throw( std::runtime_error ) {
	TidyBuffer outputBuffer = { 0 };
	TidyBuffer errorBuffer = { 0 };
	// try to create valid XHTML document for XML parser:
	int tidyResult = -1;
	if( tidyOptSetBool( handle, TidyXhtmlOut, yes ) ) {
		tidyResult = tidySetErrorBuffer( handle, &errorBuffer );
	}
	if( tidyResult >= 0 ) {
		tidyResult = tidyParseString( handle, document.c_str() );
	}
	if( tidyResult >= 0 ) {
		tidyResult = tidyCleanAndRepair( handle );
	}
	if( tidyResult >= 0 ) {
		tidyResult = tidyRunDiagnostics( handle );
	}
	if( tidyResult > 1 ) {
		if( !tidyOptSetBool( handle, TidyForceOutput, yes ) ) {
			tidyResult = -1;
		}
	}
	if( tidyResult >= 0 ) {
		tidyResult = tidySaveBuffer( handle, &outputBuffer );
	}
	if( tidyResult > 0 ) {
		std::clog << "*********************************" << std::endl;
		std::clog << "HTMLTidy: Diagnostics of libtidy:" << std::endl;
		std::clog << errorBuffer.bp;
		std::clog << "*********************************" << std::endl;
	}
	else if( tidyResult < 0 ) {
		std::stringstream sstrTidyResult;
		sstrTidyResult << tidyResult;
		throw std::runtime_error( "HTMLTidy: A severe error occured while tidying up the received document ("
		                          + sstrTidyResult.str()
		                          + ")."
		                        );
	}
	resultDocument.reserve( outputBuffer.size ); // avoid frequent (re-)allocations
	for( unsigned int i = 0; i < outputBuffer.size; i++ ) {
		resultDocument.insert( resultDocument.end(), static_cast< char >( *(outputBuffer.bp + i) ) );
	}
	tidyBufFree( &outputBuffer );
	tidyBufFree( &errorBuffer );
}
示例#10
0
HtmlTidy::HtmlTidy(const QString& html)
    : m_tidyDoc(tidyCreate()),
      m_errorOutput(),
      m_output(),
      m_input(html)
{
    tidyOptSetBool (m_tidyDoc, TidyXmlOut,       yes);
    tidyOptSetValue(m_tidyDoc, TidyCharEncoding, "utf8");
    tidyOptSetInt  (m_tidyDoc, TidyNewline,      TidyLF);
    tidyOptSetBool (m_tidyDoc, TidyQuoteNbsp,    no);
    tidyOptSetBool (m_tidyDoc, TidyForceOutput,  yes);

    tidySetErrorBuffer(m_tidyDoc, &m_errorOutput);

    tidyParseString(m_tidyDoc, m_input.toUtf8().data());
    tidyCleanAndRepair(m_tidyDoc);
}
示例#11
0
QString tidy(QString input)
// take html code and return it converted to xhtml code
{                                                                              
  // the following code is (c) Charles Reitzel and Dave Raggett, see the package tidy                                                                                                                                                             
  TidyBuffer output = {0};                                                                                               
  TidyBuffer errbuf = {0};                                                                                               
  QString result;                                                                                                        
  int rc = -1;                                                                                                           
  Bool ok;                                                                                                               

  TidyDoc tdoc = tidyCreate();                             // Initialize "document"
  ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );          // Convert to XHTML
  if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
  tidySetCharEncoding( tdoc, "utf8" );
  if ( rc >= 0 ) rc = tidyParseString( tdoc, input.toUtf8().constData() );      // Parse the input    
  if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc );          // Tidy it up!        
  if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc );          // Kvetch             
  if ( rc > 1 )                                            // If error, force output.
    rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );                   
  if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output );     // Pretty Print           
  if ( rc >= 0 )                                                                     
  {                                                                                                
    char* outputstring; // content of the outputfile                                 

    // find out length of outputstring
    int length=0; // length of outputstring
    byte* string=output.bp;                
    while (*string)                        
    {                                      
      string++;                                                
      length++;                                                
    }                                                          

    outputstring=(char*)malloc(length);        
    snprintf(outputstring,length,"%s",output.bp);
    result=QString::fromUtf8(outputstring,length);
  }                                                    
  else                                                 
    printf( "A severe error (\%d) occurred.\\n", rc ); 
  tidyBufFree( &output );                              
  tidyBufFree( &errbuf );                              
  tidyRelease( tdoc );
  result=result.replace("&Atilde;&para;","&ouml;");
  return result;                                       
}
示例#12
0
bool CCFHtmlTidy::TidyMain(const char* pSourceIn, const char* pOptions, std::string &strOut, std::string &strErr)
{
	TidyBuffer output;
	TidyBuffer errbuf;
	int rc = -1;
	Bool ok = yes;

	TidyDoc tdoc = tidyCreate();                     // Initialize "document"
	tidyBufInit(&output);
	tidyBufInit(&errbuf);

	TidyOptionsSet(tidyDocToImpl(tdoc), pOptions);

	if (ok)
		rc = tidySetErrorBuffer(tdoc, &errbuf);      // Capture diagnostics
	if (rc >= 0)
		rc = tidyParseString(tdoc, pSourceIn);           // Parse the input
	if (rc >= 0)
		rc = tidyCleanAndRepair(tdoc);               // Tidy it up!
	if (rc >= 0)
		rc = tidyRunDiagnostics(tdoc);               // Kvetch
	//if ( rc > 1 )                                    // If error, force output.
	//	rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
	if (rc >= 0)
		rc = tidySaveBuffer(tdoc, &output);          // Pretty Print

	if (rc >= 0)
	{
		if (output.bp)
		{
			strOut = reinterpret_cast<char const*>(output.bp);
		}
	}

	strErr = reinterpret_cast<char const*>(errbuf.bp);
	std::string strEmpty = "No warnings or errors were found.\n\n";
	if (0 == strEmpty.compare(strErr))
	{
		strErr.clear();
	}
	tidyBufFree(&output);
	tidyBufFree(&errbuf);
	tidyRelease(tdoc);
	return true;
}
示例#13
0
	void tidy(std::string &input)
	{
		TidyBuffer output = {0};
		TidyBuffer errbuf = {0};
		TidyDoc tdoc = tidyCreate();   
		tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
		tidySetErrorBuffer(tdoc, &errbuf);      // Capture diagnostics

		tidyParseString(tdoc, input.c_str());
		tidyCleanAndRepair(tdoc);
		tidySaveBuffer(tdoc, &output);

		input = std::string((const char*)output.bp);
		
		tidyBufFree(&output);
		tidyBufFree(&errbuf);
		tidyRelease(tdoc);
	}
示例#14
0
bool HTidyInterface::formatSource( const char* textIn, CString &strTidy, CString &strMsg )
{
	TidyBuffer output;
	TidyBuffer errbuf;	
	int rc = -1;
	Bool ok = yes;

	TidyDoc tdoc = tidyCreate();                     // Initialize "document"
	tidyBufInit(&output);
	tidyBufInit(&errbuf);

	InitTidyDefault(tdoc);
	SetTidyConfig(tdoc);

	if ( ok )
		rc = tidySetErrorBuffer(tdoc, &errbuf);      // Capture diagnostics
	if ( rc >= 0 )
		rc = tidyParseString(tdoc, textIn);           // Parse the input
	if ( rc >= 0 )
		rc = tidyCleanAndRepair(tdoc);               // Tidy it up!
	if ( rc >= 0 )
		rc = tidyRunDiagnostics(tdoc);               // Kvetch
	//if ( rc > 1 )                                    // If error, force output.
	//	rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
	if ( rc >= 0 )
		rc = tidySaveBuffer(tdoc, &output);          // Pretty Print

	if ( rc >= 0 )
	{		
		strTidy = reinterpret_cast< char const* >(output.bp);
	}

	strMsg = reinterpret_cast< char const* >(errbuf.bp);
	CString strEmpty = _T("No warnings or errors were found.\r\n\r\n");
	if (0 == strEmpty.Compare(strMsg))
	{
		strMsg.Empty();
	}
	tidyBufFree(&output);
	tidyBufFree(&errbuf);
	tidyRelease(tdoc);
	return true;
}
示例#15
0
// -------------------------------------------------------------
void Webpage::tidy_me()
{
	try {
		TidyDoc _tdoc = tidyCreate();
		//tidyOptSetBool(_tdoc, tidyOptGetIdForName("show-body-only"), (Bool)1);
		tidyOptSetBool(_tdoc, tidyOptGetIdForName("output-xhtml"), (Bool)1);
		tidyOptSetBool(_tdoc, tidyOptGetIdForName("quote-nbsp"), (Bool)0);
		tidyOptSetBool(_tdoc, tidyOptGetIdForName("show-warnings"), (Bool)0);
		tidyOptSetValue(_tdoc, tidyOptGetIdForName("char-encoding"), "utf8");
		//tidyOptSetBool(_tdoc, tidyOptGetIdForName("ascii-chars"), (Bool)1);
		//tidyOptSetBool(_tdoc, tidyOptGetIdForName("markup"), (Bool)1);
		//tidyOptSetValue(_tdoc, tidyOptGetIdForName("indent"), "yes");
		//tidyOptSetValue(_tdoc, tidyOptGetIdForName("newline"), "\n");
		tidyOptSetInt(_tdoc, tidyOptGetIdForName("wrap"), 5000);
		tidyParseString( _tdoc, contents.c_str() );
	
		/*
		// tidySaveBuffer doesn't seem to work with the makefile for some reason.
		TidyBuffer output = {0};
		tidySaveBuffer(_tdoc, &output);
		cout << "3. TidyBuffer size: " << output.size << endl;
		contents = string((char*)output.bp, (size_t)output.size);
		 */
		
		// tidySaveString is a tricky beast.
		tmbstr buffer = NULL;
		uint buflen = 0;
		int status;
		do {
			status = tidySaveString( _tdoc, buffer, &buflen );
			if (status == -ENOMEM) {
				if(buffer) 
					free(buffer);
				buffer = (tmbstr)malloc(buflen + 1);
			}
		} while (status == -ENOMEM);
		contents = (char*)buffer;

	} catch (exception& e) {
		throw e.what();
	}
}
示例#16
0
int main(int argc, char **argv )
{
    const char* input = "<title>Hello</title><p>World!";
    TidyBuffer output = {0};
    TidyBuffer errbuf = {0};
    int rc = -1;
    Bool ok;

    // Initialize "document"
    TidyDoc tdoc = tidyCreate();
    printf( "Tidying:\t%s\n", input );

    // Convert to XHTML
    ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );  
    if ( ok )
        rc = tidySetErrorBuffer( tdoc, &errbuf );    // Capture diagnostics
    if ( rc >= 0 )
        rc = tidyParseString( tdoc, input );         // Parse the input
    if ( rc >= 0 )
        rc = tidyCleanAndRepair( tdoc );             // Tidy it up!
    if ( rc >= 0 )
        rc = tidyRunDiagnostics( tdoc );             // Kvetch
    if ( rc > 1 )                                    // If error, force output.
        rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
    if ( rc >= 0 )
        rc = tidySaveBuffer( tdoc, &output );        // Pretty Print

    if ( rc >= 0 )
    {
    if ( rc > 0 )
        printf( "\nDiagnostics:\n\n%s", errbuf.bp );
    printf( "\nAnd here is the result:\n\n%s", output.bp );
    }
    else
        printf( "A severe error (%d) occurred.\n", rc );

    tidyBufFree( &output );
    tidyBufFree( &errbuf );
    tidyRelease( tdoc );
    return rc;
}
示例#17
0
文件: tdoc.c 项目: nuxlli/wax
// quick and dirty shortcut function.
int lua_tidy_easyClean ( lua_State *L )
{
    TidyBuffer output;
    TidyBuffer errbuf;
    int rc;
	pTidy t;
	const char * input;

    tidyBufInit(&output);
    tidyBufInit(&errbuf);
    
    rc = -1;
    
    t = toTidy(L,1);
    input = lua_tostring(L,2);
    
    rc = tidySetErrorBuffer( t->tdoc, &errbuf );
    if ( rc >= 0 )
        rc = tidyParseString( t->tdoc, input );
    if ( rc >= 0 )
        rc = tidyCleanAndRepair( t->tdoc );
    if ( rc >= 0 )
        rc = tidyRunDiagnostics( t->tdoc );
    if ( rc >= 0 )
        rc = tidySaveBuffer( t->tdoc, &output );    
    
    lua_pushlstring(L, (char*)output.bp,output.size);
    if ( rc != 0  )
        lua_pushlstring(L, (char*)errbuf.bp,errbuf.size);
    else
        lua_pushnil(L);
    
    lua_pushnumber(L, rc);
    
    tidyBufFree( &output );
    tidyBufFree( &errbuf );
    
    
    return 3;
}
示例#18
0
void html_parse(const gchar* html, GSList** objs) {
	TidyDoc tdoc = tidyCreate();
	TidyBuffer tidy_errbuf = {0};
	int err = 0;
  
	tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ 
	tidyOptSetInt(tdoc, TidyWrapLen, 4096);
	tidySetErrorBuffer( tdoc, &tidy_errbuf );
    
	err = tidyParseString(tdoc, html); /* parse the input */ 
	
	if ( err >= 0 ) {
		err = tidyCleanAndRepair(tdoc); /* fix any problems */ 
		
		if ( err >= 0 ) {
			err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ 
			
			if ( err >= 0 ) {
				html_find_objects(tidyGetHtml(tdoc), objs); /* walk the tree */ 
			}
		}
	}
}
示例#19
0
static PyObject *parseString(PyObject *self, PyObject *args)
{
    char *cp;
    int i, len, list_size;
    TidyDoc tdoc;
    TidyOption option = TidyUnknownOption;
    PyObject *res = NULL, *arglist = NULL;
    PyObject *key_list = NULL, *item = NULL, *value = NULL;
    TidyBuffer output = {0};
    TidyBuffer errbuf = {0};

    if (!PyArg_ParseTuple(args, "s#|O", &cp, &len, &arglist))
        return NULL;

    if (arglist && !PyDict_Check(arglist))
    {
        PyErr_SetString(PyExc_TypeError, "Second argument must be a dictionary!");
        return NULL;
    }

    tdoc = tidyCreate();
    tidySetErrorBuffer(tdoc, &errbuf);

    if (!arglist) goto im_so_lazy; /* no args provided */

    key_list = PyDict_Keys(arglist);
    list_size = PyList_Size(key_list);

    for (i = 0; i < list_size; i++)
    {
        item = PyList_GetItem(key_list, i);
        value = PyDict_GetItem(arglist, item);
        Py_INCREF(item);
        Py_INCREF(value);

        option = tidyGetOptionByName(tdoc, PyString_AsString(item));

        if (option == TidyUnknownOption)
        {
            PyErr_Format(PyExc_KeyError, "Unknown tidy option '%s'", PyString_AsString(item));
            TDOC_RETURN();
        }

        switch (tidyOptGetType(option))
        {
            case TidyString:
                PY_TO_TIDY(String_Check, Value, String_AsString, "a String");
                break;
            case TidyInteger:
                PY_TO_TIDY(Int_Check, Int, Int_AsLong, "an Integer");
                break;
            case TidyBoolean:
                PY_TO_TIDY(Int_Check, Bool, Int_AsLong, "a Boolean or an Integer");
                break;
            default:
            {
                PyErr_Format(PyExc_RuntimeError,
                             "Something strange happened, there is no option type %d",
                             tidyOptGetType(option));
                TDOC_RETURN();
            }
        }
        Py_DECREF(item);
        Py_DECREF(value);
    }

 im_so_lazy:
    tidyParseString(tdoc, cp);
    tidyCleanAndRepair(tdoc);
    tidySaveBuffer(tdoc, &output);

    res = Py_BuildValue("s#", output.bp, output.size);
    tidyBufFree(&output);
    tidyBufFree(&errbuf);
    tidyRelease(tdoc);
    return res;
}
示例#20
0
static PyObject*
elementtidy_fixup(PyObject* self, PyObject* args)
{
    int rc;
    TidyDoc doc;
    TidyBuffer out = {0};
    TidyBuffer err = {0};
    PyObject* pyout;
    PyObject* pyerr;

    char* text;
    char* encoding = NULL;
    if (!PyArg_ParseTuple(args, "s|s:fixup", &text, &encoding))
        return NULL;

    doc = tidyCreate();

    /* options for nice XHTML output */
    if (encoding)
        /* if an encoding is given, use it for both input and output */
        tidyOptSetValue(doc, TidyCharEncoding, encoding);
    else
        /* if no encoding is given, use default input and utf-8 output */
        tidyOptSetValue(doc, TidyOutCharEncoding, "utf8");
    tidyOptSetBool(doc, TidyForceOutput, yes);
    tidyOptSetInt(doc, TidyWrapLen, 0);
    tidyOptSetBool(doc, TidyQuiet, yes);
    tidyOptSetBool(doc, TidyXhtmlOut, yes);
    tidyOptSetBool(doc, TidyXmlDecl, yes);
    tidyOptSetInt(doc, TidyIndentContent, 0);
    tidyOptSetBool(doc, TidyNumEntities, yes);

    rc = tidySetErrorBuffer(doc, &err);
    if (rc < 0) {
        PyErr_SetString(PyExc_IOError, "tidySetErrorBuffer failed");
        goto error;
    }

    rc = tidyParseString(doc, text);
    if (rc < 0) {
        PyErr_SetString(PyExc_IOError, "tidyParseString failed");
        goto error;
    }

    rc = tidyCleanAndRepair(doc);
    if (rc < 0) {
        PyErr_SetString(PyExc_IOError, "tidyCleanAndRepair failed");
        goto error;
    }

    rc = tidyRunDiagnostics(doc);
    if (rc < 0) {
        PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed");
        goto error;
    }

    rc = tidySaveBuffer(doc, &out);
    if (rc < 0) {
        PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed");
        goto error;
    }


    pyout = PyString_FromString(out.bp ? out.bp : "");
    if (!pyout)
        goto error;
    pyerr = PyString_FromString(err.bp ? err.bp : "");
    if (!pyerr) {
        Py_DECREF(pyout);
        goto error;
    }

    tidyBufFree(&out);
    tidyBufFree(&err);

    tidyRelease(doc);

    return Py_BuildValue("NN", pyout, pyerr);

  error:
    tidyBufFree(&out);
    tidyBufFree(&err);

    tidyRelease(doc);

    return NULL;
}
示例#21
0
int CProxyParse::RunFromMem( wxString content )
{
	char *pBuffer;
	//http://www.51proxied.com/http_non_anonymous.html
	//wxString path = wxT("f:/work/windows/wxUrlRefresh/data/最新透明HTTP代理服务器.htm");
	//wxString path1 = wxT("f:/work/windows/wxUrlRefresh/data/result.xml");

	wxString data_path = wxGetCwd() + "/data/";
	wxString path1 = data_path + "_tmp.xml";

	if (!wxDirExists(data_path))
		wxMkdir(data_path);

	pBuffer = (char*)calloc(content.Length()+1, 1);
	wxStrncpy(pBuffer, content, content.Len()+1);


	wxLogMessage("Run Tidy!");
	TidyBuffer output;
	TidyBuffer errbuf;
	int rc = -1;
	Bool ok;
	TidyDoc tdoc = tidyCreate();                     // Initialize "document"
	tidyBufInit( &output );
	tidyBufInit( &errbuf );
	//printf( "Tidying:\t\%s\\n", input );
	tidySetCharEncoding(tdoc, "utf8");
	ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
	if ( ok )
		rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
	if ( rc >= 0 )
		rc = tidyParseString( tdoc, pBuffer );           // Parse the input
	if ( rc >= 0 )
		rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
	if ( rc >= 0 )
		rc = tidyRunDiagnostics( tdoc );               // Kvetch
	if ( rc > 1 )                                    // If error, force output.
		rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
	if ( rc >= 0 )
		rc = tidySaveBuffer( tdoc, &output );          // Pretty Print
	if ( rc >= 0 )
	{
#ifdef _DEBUG
		//if ( rc > 0 )
		//	WriteAllToFile("f:/work/windows/wxUrlRefresh/data/error.xml", (char*)errbuf.bp, errbuf.size);
		WriteAllToFile(path1, (char*)output.bp, output.size);
#endif

	}
	else
		wxLogError("tidyFail");

	tidyBufFree( &output );
	tidyBufFree( &errbuf );
	tidyRelease( tdoc );
	if (pBuffer) free(pBuffer);


	wxLogMessage("Fetch data!");
	// 解析数据
	TiXmlDocument doc(path1);
	if (doc.LoadFile()) 
	{
		// root
		CTiXmlProxyVistor vistor(&m_array);
		TiXmlElement *pRoot = doc.RootElement();
		pRoot->Accept(&vistor);
	}
	else
	{
		wxLogMessage("shit");
		return -2;
	}
	return 0;
}
示例#22
0
	/*!
	* \fn static int TidyHtml(const char *pcSourcePage, string &sDestPage);
	* \brief  修补丢失、错误标签
	* \param  [in]待修补网页字符串
	* \param  [out]修补后的网页string
	* \return 结果码,==0修补正确,<0修补失败
	* \date   2011-06-01 
	* \author nanjunxiao
	*/
	int Pretreat::TidyHtml(const char *pcSourcePage, std::string &sDestPage)
	{
		int iReturn = 0;
		TidyBuffer errbuf = {0};
		TidyDoc tdoc;
		tmbstr pBuffer = NULL;

		try
		{
			if ( (pcSourcePage == NULL) || (strlen(pcSourcePage) ==0 ) )
			{
				//cerr << "TidyHtml 输入页面为空!" << endl;
				throw (-1);
			}

			int iRet = -1;
			Bool bOk;
			uint uiBufLen;
			int iBufSize;
			tdoc = tidyCreate();// Initialize "document"
			bOk = tidyOptSetBool(tdoc, TidyXhtmlOut, yes);// Convert to XHTML
			if (bOk)
			{
				iRet = tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				iRet = tidySetCharEncoding(tdoc,"utf8"); //Ensure dealing with gb2312 successfully
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				string htmlsrc = pcSourcePage;
				iRet = tidyParseString (tdoc, htmlsrc.c_str() ); // Parse the input
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				iRet = tidyCleanAndRepair(tdoc); //Tidy it up!
			}
			else
			{
				throw (-1);
			}
			
			if (iRet >= 0)
			{
				iRet = tidyRunDiagnostics(tdoc); //Kvetch
			}
			else
			{
				throw (-1);
			}

			if(iRet > 1) // If error, force output.
			{
				iRet = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? iRet : -1 );
			}
			else if (iRet < 0)
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				// Pretty Print
				iBufSize = 1024 * 1024 * 5;
				uiBufLen = iBufSize;
				pBuffer = new char [iBufSize];
				memset(pBuffer, '\0', iBufSize);
				iRet = tidySaveString(tdoc, pBuffer, &uiBufLen);
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				sDestPage = pBuffer;
			}
			else if (iRet == -ENOMEM)
			{
				//pBuffer 长度不够
				//cerr << "TidyHtml pBuffer长度不够!" << endl;
				throw (-1);
			}
			else
			{
				throw (-1);
			}
		}
		catch(exception &err)
		{
			//cerr << "TidyHtml HtmlTidy修补页面失败! " << err.what() << endl;
			iReturn = -1;
		}
		catch(int iThrow)
		{
			if (iThrow < 0)
			{
				//cerr << "TidyHtml HtmlTidy修补页面失败!" << endl;
			}
			iReturn = iThrow;
		}
		catch(...)
		{
			//cerr << "TidyHtml HtmlTidy修补页面失败!" << endl;
			iReturn = -1;
		}

		tidyBufFree(&errbuf);
		tidyRelease(tdoc);
		if (pBuffer != NULL)
		{
			delete [] pBuffer;
			pBuffer = NULL;
		}
		return iReturn;
	}
示例#23
0
int CCaHtmlParse::ParseCaHtmlFlights(std::list<SCaLowPriceFlightDetail*> & listFlight, const std::string& strHtmlData, const CStringA & straDCode, const CStringA & straACode, const SCaLowPriceFlightInfo*	pLowPriceFlightInfo)
{
	TidyDoc doc = tidyCreate();
	tidySetCharEncoding(doc,"raw");
	tidyParseString(doc,strHtmlData.c_str());
	TidyNode tnRoot = tidyGetRoot(doc);

	TidyNode tFlightTab;
	TidyNode tdChild;
	int nIndexTd = 0;

	CTime tCurrent = CTime::GetCurrentTime();
	SCaLowPriceFlightDetail *pfindFlight = NULL;
	if (FindNode(tnRoot,"class","CA_table mt_10 clear",tFlightTab))
	{
		//循环解析结算价,tblPolicy下的每一个子节点即为一条结算价信息
		TidyNode trFlight;
		int nIndexTr = 0;
		BOOL bValid = FALSE;
		CStringA straDPortCode = straDCode;
		CStringA straAPortCode = straACode;
		CStringA straFlightNo("");
		CStringA straFlightStartDate("");
		CStringA straSaleEndDate("");
		CStringA straSaleEndTime("");
		CStringA straFlightStartTime("");

		UINT uPrice = 0;
		UINT uRemainTicket = 0;
		for ( trFlight = tidyGetChild(tFlightTab); trFlight; trFlight = tidyGetNext(trFlight) )
		{
			if (0 == nIndexTr)//跳过表头
			{
				nIndexTr++;
				continue;
			}

			nIndexTd = 0;
			bValid = FALSE;
			straFlightNo = "";
			straFlightStartDate = "";
			straSaleEndDate = "";
			straSaleEndTime = "";
			straFlightStartTime = "";
			uPrice = 0;
			uRemainTicket = 0;
			for ( tdChild = tidyGetChild(trFlight); tdChild; tdChild = tidyGetNext(tdChild) )
			{
				switch(nIndexTd)
				{
				case 0:
					{
						//选择,是否为disabled
						bValid = __IsFlightValid(tdChild);
						TRACE(_T("Flight valid:%d-"), bValid);
						
					}
					break;
				case 1:
					{
						//日期/航班号
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						__GetFlightNoAndFlightStartDate(straFlightNo, straFlightStartDate, doc, tdChild);
						TRACE("date:%s, no:%s-", straFlightStartDate, straFlightNo);
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
					}
					break;
				case 2:
					{
						//起降时间
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						__GetFlightStartTime(straFlightStartTime, doc, tdChild);
					}
					break;
				case 3:
					{
						//机场
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						if (__IsTwoAirPort(straDCode, straACode))
						{
							__GetAirPortCode(straDPortCode, straAPortCode, doc, tdChild);
							if(straDPortCode.IsEmpty())
								straDPortCode = straDCode;
							if(straAPortCode.IsEmpty())
								straAPortCode = straACode;
							TRACE("%s->%s-", straDPortCode, straAPortCode);
						}

					}
					break;
				case 4:
					{
						//销售结束日期,时间
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						//TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						__GetSaleEndDate(straSaleEndDate, straSaleEndTime, doc, tdChild);
						TRACE("sale end date:%s, %s-", straSaleEndDate, straSaleEndTime);
					}
					break;
				case 5:
					{
						//团购价
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						//TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						//CStringA straSetPrice = GetNodeContent(doc, tdChild);

						//double fSetPrice = atof(straSetPrice.GetBuffer(0));
						//straSetPrice.ReleaseBuffer();
						//tidyRelease(doc);
						//return fSetPrice;
						__GetPriceAndRamainTicket(&uPrice, &uRemainTicket, doc, tdChild);
						TRACE("price:%d, remain %d seats", uPrice, uRemainTicket);
					}
					break;
				}

				nIndexTd++;
			}
			TRACE(_T("\r\n"));

			//截至日期之后的航班不抓取
			//得到起飞日期
			int nFlightStartYear = 2014;
			int nFlightStartMonth = 12;
			int nFlightStartDay = 12;
			GetYearMonthDay(straFlightStartDate, &nFlightStartYear, &nFlightStartMonth, &nFlightStartDay);
			
			CTime tStart(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 0, 0, 0);
			//if (!m_bGetAllCaTuanFlight)
			//{
			//	if (tStart > m_tGetEndTime)
			//		continue;
			//}
			//
			
			//double d6 = pLowPriceFlightInfo->iMinHangPrice * 0.6;
			//UINT u6 = (UINT)d6;
			////6折以上普通团购退改签要收费(低价申请不受限制),所以不上
			//if (uPrice > d6 && CA_TUAN_PRODUCT == pLowPriceFlightInfo->iProductType)
			//{
			//	bValid = FALSE;
			//	uRemainTicket = 0;
			//	continue;
			//}
			//相同日期、时间、班次的航班,只取最低价
			BOOL bFind = __findCaFlight(&pfindFlight, straFlightStartDate, straDPortCode, straAPortCode, straFlightNo, listFlight);
			if (bFind)
			{
				int nCurPrice = (int)uPrice;
				//当前解析出的这个比上次解析出的便宜
				if(pfindFlight->nPrice > nCurPrice)
				{
					if (uRemainTicket > m_nMinTicketWarnNum)
					{
						//当前票的数量充足时,用当前票的数量更新上次解析出的数量
						pfindFlight->nRemainSeat = uRemainTicket;
						pfindFlight->nPrice = nCurPrice;
						pfindFlight = NULL;
					}
				}
				else //(pfindFlight->nPrice <= nCurPrice)
				{
					if(pfindFlight->nRemainSeat <= m_nMinTicketWarnNum)
					{
						pfindFlight->nRemainSeat = uRemainTicket;
						pfindFlight->nPrice = nCurPrice;
						pfindFlight = NULL;
					}
				}

				continue;
			}

			//保存解析出来的航班信息,调用者负责释放内存
			if (bValid)
			{
				SCaLowPriceFlightDetail* pDetail = new SCaLowPriceFlightDetail;
				pDetail->straCompany = "CA";	
				pDetail->straFromCityCode = straDPortCode;	
				pDetail->straToCityCode = straAPortCode;	
				pDetail->straFlightNo = straFlightNo;		
				pDetail->straFromDate = straFlightStartDate;	
				//由于携程订单进入需要一定的时间,国航下班16:00下班,所以当天的票,第2天12:00之前的票,销售结束时间提前30分钟,	
				//取销售间隔
				int nSaleEndYear = 2014;
				int nSaleEndMonth = 12;
				int nSaleEndDay = 12;
				GetYearMonthDay(straSaleEndDate, &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay);
				int nSaleEndHour = 12;
				int nSaleEndMin = 0;
				GetHourMinSec(straSaleEndTime, &nSaleEndHour, &nSaleEndMin);
				CTime tSaleEndDate(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, 0);
				CTimeSpan tSpan = tSaleEndDate - tCurrent;
				//end 取销售间隔
				//得到起飞时间
				int nFlightStartHour = 12;
				int nFlightStartMin = 0;
				GetHourMinSec(straFlightStartTime, &nFlightStartHour, &nFlightStartMin);
				CTime tFlightStartTime(nFlightStartYear, nFlightStartMonth, nFlightStartDay, nFlightStartHour, nFlightStartMin, 0);
				CTime tTimeKey(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 12, 0, 0);
				//end 得到起飞时间
	
				//今明两天的、起飞时间在12点之前、且是低价申请的,销售结束时间为 前一天的官网销售结束的前30分钟
				if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (1 == tSpan.GetDays()))//明天的的低价申请
				{	
					if(tFlightStartTime <= tTimeKey)//明天12起飞的低价申请, 今天下午3:25前有效(国航4点下班)
					{
						pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay());
						CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 25, 0);
						pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0);	
					}
					else//明天12后起飞的低价申请,明早可以出票
					{
						pDetail->straSaleEndDate = straSaleEndDate;
						pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0);
					}
				}
				else if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (tSpan.GetDays() < 1))//今天的的低价申请,今天下午3:30前有效(国航4点下班)
				{
					pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay());
					CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 30, 0);
					pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0);	
				}
				else//普通团购,后天及以后的低价申请
				{
					pDetail->straSaleEndDate = straSaleEndDate;
					pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0);
				}

				//政策销售时间到,删除政策
				GetYearMonthDay(pDetail->straSaleEndDate,  &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay);
				int nSaleEndSec = 0;
				GetHourMinSec(pDetail->straSaleEndTime, &nSaleEndHour, &nSaleEndMin, &nSaleEndSec);
				CTime tPolicyDeleteTime(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, nSaleEndSec);
				if (tCurrent >= tPolicyDeleteTime)
					uRemainTicket = 0;

				pDetail->nPrice = uPrice;				
				pDetail->nProductId = pLowPriceFlightInfo->iProductId;			
				pDetail->nRemainSeat = uRemainTicket;	
				pDetail->nProductType = pLowPriceFlightInfo->iProductType;

				listFlight.push_back(pDetail);
			}
		}
	}

	tidyRelease(doc);	

	return -1.0;
}
示例#24
0
    void FetchTaskHandler(const FetchTaskMessage &message, const Theron::Address from)
        {
            std::cout<<"get data.................."<<std::endl;

            std::string url=message.fi->url;
            char curl_errbuf[CURL_ERROR_SIZE];
            CURL *curl = curl_easy_init();
            int err;

            fetch::FetchResult *result=new fetch::FetchResult();
            fetch::FetchInfo fi=*(message.fi);
            delete message.fi;
            result->type=fetch::UNKNOWN; 
            result->url=fi.url;
            result->pathList=fi.pathList;
            result->attMap=fi.attMap;
            int errCode=0;
            if(curl!=NULL) {
                curl_easy_setopt(curl, CURLOPT_URL,url.c_str());
                curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
//                curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
//                curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
                curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
                curl_easy_setopt(curl, CURLOPT_NOSIGNAL,1);
                curl_easy_setopt(curl, CURLOPT_TIMEOUT ,60);

                curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fetch_write);
                
                std::stringstream iss;
                curl_easy_setopt(curl, CURLOPT_WRITEDATA, &iss);

                err=curl_easy_perform(curl);
                if ( !err ) 
                {
                    std::map<std::string,std::string>::iterator efit=fi.attMap.find("encode");
                    char *resStr=new char[iss.str().length()*3];
                    memset(resStr,0,iss.str().length()*3);
                    if(efit!=fi.attMap.end())
                    {
                        UErrorCode  error = U_ZERO_ERROR;
                        ucnv_convert("UTF-8",efit->second.c_str(),resStr,  iss.str().length()*3, iss.str().c_str(), iss.str().length(), &error );
                    }else
                    {
                        strcpy(resStr,iss.str().c_str());
                    }
                    TidyDoc tdoc;
//                TidyBuffer tidy_errbuf = {0};
//            TidyBuffer docbuf = {0};
                
                    tdoc = tidyCreate();
                    tidyOptSetInt(tdoc, TidyWrapLen, 4096);
//                tidySetErrorBuffer( tdoc, &tidy_errbuf );
                    tidyOptSetBool( tdoc, TidyXmlOut, yes );
                    tidyOptSetBool(tdoc, TidyQuoteNbsp, no);
                    tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ 
//                tidyOptSetBool( tdoc, TidyXmlDecl, yes );  
                    tidyOptSetBool(tdoc, TidyQuiet, yes);
                    tidyOptSetBool(tdoc, TidyShowWarnings, no);
                    tidyOptSetValue(tdoc,TidyDoctype,"omit");

                    tidyOptSetBool(tdoc, TidyFixBackslash, yes);
                    
                    tidyOptSetBool(tdoc, TidyMark, no);

                    tidySetCharEncoding(tdoc,"utf8");
//                tidyBufInit(&docbuf);
//                    err = tidyParseBuffer(tdoc, &docbuf); 
                    err = tidyParseString(tdoc, resStr); 
                    if ( err >= 0 ) {
                        err = tidyCleanAndRepair(tdoc); /* fix any problems */ 
                        if ( err >= 0 ) {
//                            err=tidyRunDiagnostics(tdoc); /* load tidy error buffer */ 
//                            if ( err >= 0 ) {
//                                std::cout<<"tidy error:"<<tidy_errbuf.bp<<std::endl; /* show errors */ 
                            TidyBuffer outbuf = {0};
                            tidyBufInit(&outbuf);
                            tidySaveBuffer( tdoc, &outbuf );
                            std::stringstream hss;
                            hss<<(char*)outbuf.bp;
                            tidyBufFree(&outbuf);
                            result->result=hss.str();
//                        }
//                        else
//                        {
//                                errCode=-5;
//                            }

                            
                        }else
                        {
                            errCode=-4;
                        }
                    }else
                    {
                        errCode=-3;
                    }
                    //                tidyBufFree(&tidy_errbuf);
//            tidyBufFree(&docbuf);
                    tidyRelease(tdoc);

                    delete [] resStr;
                }else
                {
                    errCode=-2;
                }
            }else
            {
                errCode=-1;
            }
            if(errCode<0)
            {
                std::stringstream ess;
                ess<<errCode;
                result->type=fetch::ERROR;
                result->result=ess.str();
            }
            curl_easy_cleanup(curl);
            Send(FetchResultMessage(result), from);
        }