int main(int argc, char *argv[]) { CURL *curl; int counter = 0; if (argc < 2) return 1; curl = curl_easy_init(); TidyDoc tdoc = tidyCreate(); TidyBuffer output = {0}; tidyOptSetBool(tdoc, TidyXmlOut, yes); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetInt(tdoc, TidyWrapLen, 0); for(int i=0; i < 20; i++) { // tidyBufFree(&output); tidyBufClear(&output); // tidyParseFile(tdoc, argv[1]); tidyParseString(tdoc, getpage(curl,i).c_str()); tidySaveBuffer(tdoc, &output); // tidySaveFile(tdoc, "tidy_test.xml"); // doc.LoadFile(argv[1]); // doc.LoadFile("tidy_test.xml"); parseTidyBuf(output, counter); } curl_easy_cleanup(curl); return 0; }
QString tidyHtml(QString str, bool& ok) { #ifdef NO_TIDY ok = true; return str; #else QString res = str; ok = false; static bool isTidyWithIntBodyOnly = isTidyWithIntBodyOnlyCheck(); TidyDoc tdoc = tidyCreate(); TidyBuffer output; TidyBuffer errbuf; tidyBufInit(&output); tidyBufInit(&errbuf); bool configOk = tidyOptSetBool(tdoc, TidyXhtmlOut, yes) && tidyOptSetBool(tdoc, TidyForceOutput, yes) && tidyOptSetBool(tdoc, TidyMark, no) && (isTidyWithIntBodyOnly ? tidyOptSetInt(tdoc, TidyBodyOnly, 1) : tidyOptSetBool(tdoc, TidyBodyOnly, yes)) && tidyOptSetInt(tdoc, TidyWrapLen, 0) && tidyOptSetInt(tdoc, TidyDoctypeMode, TidyDoctypeOmit); if (configOk && (tidySetCharEncoding(tdoc, "utf8") >= 0) && (tidySetErrorBuffer(tdoc, &errbuf) >= 0) && (tidyParseString(tdoc, str.toUtf8().data()) >= 0) && (tidyCleanAndRepair(tdoc) >= 0) && (tidyRunDiagnostics(tdoc) >= 0) && (tidySaveBuffer(tdoc, &output) >= 0) && (output.bp != 0 && output.size > 0)) { res = QString::fromUtf8((char*)output.bp, output.size); ok = true; } #ifdef DEBUG_MARKUP if (errbuf.size > 0) { QString errStr = QString::fromUtf8((char*)errbuf.bp, errbuf.size); qDebug() << "\n[DEBUG] MARKUP, libtidy errors and warnings:\n" << errStr; } #endif if (output.bp != 0) tidyBufFree(&output); if (errbuf.bp != 0) tidyBufFree(&errbuf); tidyRelease(tdoc); return res.trimmed(); #endif }
tidyhtml::tidyhtml() { this->tdoc = tidyCreate(); tidyOptSetBool( tdoc, TidyXhtmlOut, yes); tidyOptSetBool( tdoc, TidyForceOutput, yes); tidyOptSetBool( tdoc, TidyShowWarnings, no); tidyOptSetBool( tdoc, TidyQuiet, yes); tidyOptSetInt( tdoc, TidyShowErrors, 0); tidySetCharEncoding(tdoc, "utf8"); }
std::string cleanHTML (std::string html) { TidyDoc tidyDoc = tidyCreate(); TidyBuffer tidyOutputBuffer = {0}; // Configure Tidy // The flags tell Tidy to output XML and disable showing warnings bool configSuccess = tidyOptSetBool(tidyDoc, TidyXmlOut, yes) && tidyOptSetBool(tidyDoc, TidyQuiet, yes) && tidyOptSetBool(tidyDoc, TidyNumEntities, yes) && tidyOptSetBool(tidyDoc, TidyShowWarnings, no); tidyOptSetValue(tidyDoc,TidyForceOutput,"true"); int tidyResponseCode = -1; // Parse input if (configSuccess) tidyResponseCode = tidyParseString(tidyDoc, html.c_str()); // Process HTML if (tidyResponseCode >= 0) tidyResponseCode = tidyCleanAndRepair(tidyDoc); // Output the HTML to our buffer if (tidyResponseCode >= 0) tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer); // Any errors from Tidy? if (tidyResponseCode < 0) throw ("Tidy encountered an error while parsing an HTML response. Tidy response code: " + tidyResponseCode); // Grab the result from the buffer and then free Tidy's memory std::string tidyResult = (char*)tidyOutputBuffer.bp; tidyBufFree(&tidyOutputBuffer); tidyRelease(tidyDoc); return tidyResult; }
void TidyNetworkReply::tidyUp() { QUrl redirect = reply->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl(); if (redirect.isValid()) { redirect.setScheme("tidy"); setAttribute(QNetworkRequest::RedirectionTargetAttribute, QVariant(redirect)); emit finished(); reply->deleteLater(); return; } int rc = -1; Bool ok; ok = tidyOptSetBool( tdoc, TidyXmlOut, yes ); // Convert to XHTML if (ok) ok = tidyOptSetBool(tdoc, TidyQuoteNbsp, no); //if (ok) //ok = tidyOptSetValue(tdoc, TidyBlockTags, "header,nav,article,time,section,footer"); if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, reply->readAll() ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc >= 0 ) { if ( rc > 0 ) { ;//printf( "\nDiagnostics:\n\n%s", errbuf.bp ); } } else { ;//printf( "A severe error (%d) occurred.\n", rc ); } open(ReadOnly); emit readyRead(); emit finished(); reply->deleteLater(); //QTimer::singleShot(0, this, SIGNAL(readyRead())); //QTimer::singleShot(0, this, SIGNAL(finished())); }
// set option for the tidy object. int lua_tidy_setOpt ( lua_State *L ) { BOOL ok = FALSE; pTidy t = toTidy(L,1); if (lua_type(L,2) == LUA_TSTRING) { char *key = replace(lua_tostring(L,2), "_", "-", NULL); if (lua_type(L,3) == LUA_TBOOLEAN) { ok = tidyOptParseValue(t->tdoc, key, luatotidy_bool(L, 3) ? "1" : "0"); } else { ok = tidyOptParseValue(t->tdoc, key, lua_tostring(L, 3)); } } else { switch ( lua_type(L,3) ) { case LUA_TBOOLEAN: ok = tidyOptSetBool(t->tdoc, (int)lua_tonumber(L,2), luatotidy_bool(L, 3)); break; case LUA_TNUMBER: ok = tidyOptSetInt(t->tdoc, (int) lua_tonumber(L,2), (int) lua_tonumber(L, 3)); break; case LUA_TSTRING: ok = tidyOptSetValue(t->tdoc, (int) lua_tonumber(L,2), lua_tostring(L, 3)); break; } } lua_pushboolean(L, ok>=0); return 1; }
static html_valid_status_t html_valid_set_option (html_valid_t * htv, SV * option, SV * value) { TidyOption to; TidyOptionType tot; TidyOptionId ti; const char * coption; STRLEN coption_length; CHECK_INIT (htv); coption = SvPV (option, coption_length); to = tidyGetOptionByName(htv->tdoc, coption); if (to == 0) { warn ("unknown option %s", coption); return html_valid_unknown_option; } ti = tidyOptGetId (to); tot = tidyOptGetType (to); switch (tot) { case TidyString: CALL (set_string_option (htv, coption, ti, value)); break; case TidyInteger: CALL (set_number_option (htv, coption, ti, value)); break; case TidyBoolean: tidyOptSetBool (htv->tdoc, ti, SvTRUE (value)); break; default: fprintf (stderr, "%s:%d: bad option type %d from tidy library.\n", __FILE__, __LINE__, tot); return html_valid_bad_option_type; } return html_valid_ok; }
void HTMLTidy::run() throw( std::runtime_error ) { TidyBuffer outputBuffer = { 0 }; TidyBuffer errorBuffer = { 0 }; // try to create valid XHTML document for XML parser: int tidyResult = -1; if( tidyOptSetBool( handle, TidyXhtmlOut, yes ) ) { tidyResult = tidySetErrorBuffer( handle, &errorBuffer ); } if( tidyResult >= 0 ) { tidyResult = tidyParseString( handle, document.c_str() ); } if( tidyResult >= 0 ) { tidyResult = tidyCleanAndRepair( handle ); } if( tidyResult >= 0 ) { tidyResult = tidyRunDiagnostics( handle ); } if( tidyResult > 1 ) { if( !tidyOptSetBool( handle, TidyForceOutput, yes ) ) { tidyResult = -1; } } if( tidyResult >= 0 ) { tidyResult = tidySaveBuffer( handle, &outputBuffer ); } if( tidyResult > 0 ) { std::clog << "*********************************" << std::endl; std::clog << "HTMLTidy: Diagnostics of libtidy:" << std::endl; std::clog << errorBuffer.bp; std::clog << "*********************************" << std::endl; } else if( tidyResult < 0 ) { std::stringstream sstrTidyResult; sstrTidyResult << tidyResult; throw std::runtime_error( "HTMLTidy: A severe error occured while tidying up the received document (" + sstrTidyResult.str() + ")." ); } resultDocument.reserve( outputBuffer.size ); // avoid frequent (re-)allocations for( unsigned int i = 0; i < outputBuffer.size; i++ ) { resultDocument.insert( resultDocument.end(), static_cast< char >( *(outputBuffer.bp + i) ) ); } tidyBufFree( &outputBuffer ); tidyBufFree( &errorBuffer ); }
HtmlTidy::HtmlTidy(const QString& html) : m_tidyDoc(tidyCreate()), m_errorOutput(), m_output(), m_input(html) { tidyOptSetBool (m_tidyDoc, TidyXmlOut, yes); tidyOptSetValue(m_tidyDoc, TidyCharEncoding, "utf8"); tidyOptSetInt (m_tidyDoc, TidyNewline, TidyLF); tidyOptSetBool (m_tidyDoc, TidyQuoteNbsp, no); tidyOptSetBool (m_tidyDoc, TidyForceOutput, yes); tidySetErrorBuffer(m_tidyDoc, &m_errorOutput); tidyParseString(m_tidyDoc, m_input.toUtf8().data()); tidyCleanAndRepair(m_tidyDoc); }
QString tidy(QString input) // take html code and return it converted to xhtml code { // the following code is (c) Charles Reitzel and Dave Raggett, see the package tidy TidyBuffer output = {0}; TidyBuffer errbuf = {0}; QString result; int rc = -1; Bool ok; TidyDoc tdoc = tidyCreate(); // Initialize "document" ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics tidySetCharEncoding( tdoc, "utf8" ); if ( rc >= 0 ) rc = tidyParseString( tdoc, input.toUtf8().constData() ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc >= 0 ) { char* outputstring; // content of the outputfile // find out length of outputstring int length=0; // length of outputstring byte* string=output.bp; while (*string) { string++; length++; } outputstring=(char*)malloc(length); snprintf(outputstring,length,"%s",output.bp); result=QString::fromUtf8(outputstring,length); } else printf( "A severe error (\%d) occurred.\\n", rc ); tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); result=result.replace("ö","ö"); return result; }
// ------------------------------------------------------------- void Webpage::tidy_me() { try { TidyDoc _tdoc = tidyCreate(); //tidyOptSetBool(_tdoc, tidyOptGetIdForName("show-body-only"), (Bool)1); tidyOptSetBool(_tdoc, tidyOptGetIdForName("output-xhtml"), (Bool)1); tidyOptSetBool(_tdoc, tidyOptGetIdForName("quote-nbsp"), (Bool)0); tidyOptSetBool(_tdoc, tidyOptGetIdForName("show-warnings"), (Bool)0); tidyOptSetValue(_tdoc, tidyOptGetIdForName("char-encoding"), "utf8"); //tidyOptSetBool(_tdoc, tidyOptGetIdForName("ascii-chars"), (Bool)1); //tidyOptSetBool(_tdoc, tidyOptGetIdForName("markup"), (Bool)1); //tidyOptSetValue(_tdoc, tidyOptGetIdForName("indent"), "yes"); //tidyOptSetValue(_tdoc, tidyOptGetIdForName("newline"), "\n"); tidyOptSetInt(_tdoc, tidyOptGetIdForName("wrap"), 5000); tidyParseString( _tdoc, contents.c_str() ); /* // tidySaveBuffer doesn't seem to work with the makefile for some reason. TidyBuffer output = {0}; tidySaveBuffer(_tdoc, &output); cout << "3. TidyBuffer size: " << output.size << endl; contents = string((char*)output.bp, (size_t)output.size); */ // tidySaveString is a tricky beast. tmbstr buffer = NULL; uint buflen = 0; int status; do { status = tidySaveString( _tdoc, buffer, &buflen ); if (status == -ENOMEM) { if(buffer) free(buffer); buffer = (tmbstr)malloc(buflen + 1); } } while (status == -ENOMEM); contents = (char*)buffer; } catch (exception& e) { throw e.what(); } }
static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype) { PHPTidyObj *intern; intern = ecalloc(1, sizeof(PHPTidyObj) + zend_object_properties_size(class_type)); zend_object_std_init(&intern->std, class_type); object_properties_init(&intern->std, class_type); switch(objtype) { case is_node: break; case is_doc: intern->ptdoc = emalloc(sizeof(PHPTidyDoc)); intern->ptdoc->doc = tidyCreate(); intern->ptdoc->ref_count = 1; intern->ptdoc->initialized = 0; intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer)); tidyBufInit(intern->ptdoc->errbuf); if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) { tidyBufFree(intern->ptdoc->errbuf); efree(intern->ptdoc->errbuf); tidyRelease(intern->ptdoc->doc); efree(intern->ptdoc); efree(intern); php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer"); } tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes); tidyOptSetBool(intern->ptdoc->doc, TidyMark, no); TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc); tidy_add_default_properties(intern, is_doc); break; } intern->std.handlers = handlers; return &intern->std; }
static int php_tidy_output_handler(void **nothing, php_output_context *output_context) { int status = FAILURE; TidyDoc doc; TidyBuffer inbuf, outbuf, errbuf; if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) { doc = tidyCreate(); tidyBufInit(&errbuf); if (0 == tidySetErrorBuffer(doc, &errbuf)) { tidyOptSetBool(doc, TidyForceOutput, yes); tidyOptSetBool(doc, TidyMark, no); if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) { php_error_docref(NULL, E_WARNING, "Input string is too long"); return status; } TIDY_SET_DEFAULT_CONFIG(doc); tidyBufInit(&inbuf); tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint)output_context->in.used); if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) { tidyBufInit(&outbuf); tidySaveBuffer(doc, &outbuf); FIX_BUFFER(&outbuf); output_context->out.data = (char *) outbuf.bp; output_context->out.used = outbuf.size ? outbuf.size-1 : 0; output_context->out.free = 1; status = SUCCESS; } } tidyRelease(doc); tidyBufFree(&errbuf); } return status; }
int main(int argc, char **argv ) { const char* input = "<title>Hello</title><p>World!"; TidyBuffer output = {0}; TidyBuffer errbuf = {0}; int rc = -1; Bool ok; // Initialize "document" TidyDoc tdoc = tidyCreate(); printf( "Tidying:\t%s\n", input ); // Convert to XHTML ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, input ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc >= 0 ) { if ( rc > 0 ) printf( "\nDiagnostics:\n\n%s", errbuf.bp ); printf( "\nAnd here is the result:\n\n%s", output.bp ); } else printf( "A severe error (%d) occurred.\n", rc ); tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); return rc; }
int main(int argc, char **argv ) { CURL *curl; char curl_errbuf[CURL_ERROR_SIZE]; TidyDoc tdoc; TidyBuffer docbuf = {0}; TidyBuffer tidy_errbuf = {0}; int err; if ( argc == 2) { curl = curl_easy_init(); curl_easy_setopt(curl, CURLOPT_URL, argv[1]); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidySetErrorBuffer( tdoc, &tidy_errbuf ); tidyBufInit(&docbuf); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf); err=curl_easy_perform(curl); if ( !err ) { err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */ if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ if ( err >= 0 ) { dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */ fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */ } } } } else fprintf(stderr, "%s\n", curl_errbuf); /* clean-up */ curl_easy_cleanup(curl); tidyBufFree(&docbuf); tidyBufFree(&tidy_errbuf); tidyRelease(tdoc); return(err); } else printf( "usage: %s <url>\n", argv[0] ); return(0); }
void tidy(std::string &input) { TidyBuffer output = {0}; TidyBuffer errbuf = {0}; TidyDoc tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyXhtmlOut, yes); tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics tidyParseString(tdoc, input.c_str()); tidyCleanAndRepair(tdoc); tidySaveBuffer(tdoc, &output); input = std::string((const char*)output.bp); tidyBufFree(&output); tidyBufFree(&errbuf); tidyRelease(tdoc); }
void html_parse(const gchar* html, GSList** objs) { TidyDoc tdoc = tidyCreate(); TidyBuffer tidy_errbuf = {0}; int err = 0; tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidySetErrorBuffer( tdoc, &tidy_errbuf ); err = tidyParseString(tdoc, html); /* parse the input */ if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ if ( err >= 0 ) { html_find_objects(tidyGetHtml(tdoc), objs); /* walk the tree */ } } } }
void parse_urls(const char *filename, const url_list_t *elem) { TidyDoc tdoc; int err; FILE *outfile = NULL; tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); tidyOptSetBool(tdoc, TidyMark, no); tidyOptSetBool(tdoc, TidyHideEndTags, yes); tidyOptSetBool(tdoc, TidyDropEmptyParas, no); tidyOptSetBool(tdoc, TidyJoinStyles, no); tidyOptSetBool(tdoc, TidyPreserveEntities, yes); tidyOptSetInt(tdoc, TidyMergeDivs, no); tidyOptSetInt(tdoc, TidyMergeSpans, no); tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidyOptSetValue(tdoc, TidyCharEncoding, "utf8"); tidySetReportFilter(tdoc, filter_cb); err = tidyParseFile(tdoc, filename); if (err >= 0) err = tidyCleanAndRepair(tdoc); if (err >= 0) { outfile = option_values.save_relative_links && !option_values.disable_save_tree ? fopen(filename, "w") : NULL; parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile); if (outfile) fclose(outfile); } tidyRelease(tdoc); }
int CProxyParse::RunFromMem( wxString content ) { char *pBuffer; //http://www.51proxied.com/http_non_anonymous.html //wxString path = wxT("f:/work/windows/wxUrlRefresh/data/最新透明HTTP代理服务器.htm"); //wxString path1 = wxT("f:/work/windows/wxUrlRefresh/data/result.xml"); wxString data_path = wxGetCwd() + "/data/"; wxString path1 = data_path + "_tmp.xml"; if (!wxDirExists(data_path)) wxMkdir(data_path); pBuffer = (char*)calloc(content.Length()+1, 1); wxStrncpy(pBuffer, content, content.Len()+1); wxLogMessage("Run Tidy!"); TidyBuffer output; TidyBuffer errbuf; int rc = -1; Bool ok; TidyDoc tdoc = tidyCreate(); // Initialize "document" tidyBufInit( &output ); tidyBufInit( &errbuf ); //printf( "Tidying:\t\%s\\n", input ); tidySetCharEncoding(tdoc, "utf8"); ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, pBuffer ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc >= 0 ) { #ifdef _DEBUG //if ( rc > 0 ) // WriteAllToFile("f:/work/windows/wxUrlRefresh/data/error.xml", (char*)errbuf.bp, errbuf.size); WriteAllToFile(path1, (char*)output.bp, output.size); #endif } else wxLogError("tidyFail"); tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); if (pBuffer) free(pBuffer); wxLogMessage("Fetch data!"); // 解析数据 TiXmlDocument doc(path1); if (doc.LoadFile()) { // root CTiXmlProxyVistor vistor(&m_array); TiXmlElement *pRoot = doc.RootElement(); pRoot->Accept(&vistor); } else { wxLogMessage("shit"); return -2; } return 0; }
void HTidyInterface::SetTidyControl( TidyDoc tdoc, LPCTSTR lpszTidy, int nPos, int nSize ) { if (lpszTidy[nPos] != '-' || nSize < 2) { return; } int nNumValue = nSize; for (int i = nPos + 1; i < nPos + nSize; ++i) { if (!_istalpha(lpszTidy[i])) { nNumValue = i - nPos; break; } } CString strParam(lpszTidy + nPos + 1, nNumValue - 1); if (nNumValue != nSize) { CString strNum(lpszTidy + nPos + nNumValue, nSize - nNumValue); nNumValue = _ttoi(strNum); } else { nNumValue = 0; } CString strNumValue; strNumValue.Format(_T("%d"), nNumValue); CString strNothing; strNothing.Empty(); if (_T("axd") == strParam) { tidyOptSetBool(tdoc, TidyXmlDecl, yes); } else if (_T("axs") == strParam) { tidyOptSetBool(tdoc, TidyXmlSpace, yes); } else if (_T("aan") == strParam) { tidyOptSetBool(tdoc, TidyAnchorAsName, yes); } else if (_T("axp") == strParam) { tidyOptSetBool(tdoc, TidyXmlPIs, yes); } else if (_T("b") == strParam) { tidyOptSetBool(tdoc, TidyMakeBare, yes); } else if (_T("c") == strParam) { tidyOptSetBool(tdoc, TidyMakeClean, yes); } else if (_T("diu") == strParam) { tidyOptSetBool(tdoc, TidyDecorateInferredUL, yes); } else if (_T("dep") == strParam) { tidyOptSetBool(tdoc, TidyDropEmptyParas, yes); } else if (_T("dft") == strParam) { tidyOptSetBool(tdoc, TidyDropFontTags, yes); } else if (_T("dpa") == strParam) { tidyOptSetBool(tdoc, TidyDropPropAttrs, yes); } else if (_T("ebt") == strParam) { tidyOptSetBool(tdoc, TidyEncloseBlockText, yes); } else if (_T("et") == strParam) { tidyOptSetBool(tdoc, TidyEncloseBodyText, yes); } else if (_T("ec") == strParam) { tidyOptSetBool(tdoc, TidyEscapeCdata, yes); } else if (_T("fb") == strParam) { tidyOptSetBool(tdoc, TidyFixBackslash, yes); } else if (_T("fbc") == strParam) { tidyOptSetBool(tdoc, TidyFixComments, yes); } else if (_T("fu") == strParam) { tidyOptSetBool(tdoc, TidyFixUri, yes); } else if (_T("hc") == strParam) { tidyOptSetBool(tdoc, TidyHideComments, yes); } else if (_T("he") == strParam) { tidyOptSetBool(tdoc, TidyHideEndTags, yes); } else if (_T("ic") == strParam) { tidyOptSetBool(tdoc, TidyIndentCdata, yes); } else if (_T("ix") == strParam) { tidyOptSetBool(tdoc, TidyXmlTags, yes); } else if (_T("jc") == strParam) { tidyOptSetBool(tdoc, TidyJoinClasses, yes); } else if (_T("js") == strParam) { tidyOptSetBool(tdoc, TidyJoinStyles, yes); } else if (_T("la") == strParam) { tidyOptSetBool(tdoc, TidyLiteralAttribs, yes); } else if (_T("le") == strParam) { tidyOptSetBool(tdoc, TidyLogicalEmphasis, yes); } else if (_T("ll") == strParam) { tidyOptSetBool(tdoc, TidyLowerLiterals, yes); } else if (_T("n") == strParam) { tidyOptSetBool(tdoc, TidyNCR, yes); } else if (_T("ne") == strParam) { tidyOptSetBool(tdoc, TidyNumEntities, yes); } else if (_T("oh") == strParam) { tidyOptSetBool(tdoc, TidyHtmlOut, yes); } else if (_T("ox") == strParam) { tidyOptSetBool(tdoc, TidyXhtmlOut, yes); } else if (_T("oxm") == strParam) { tidyOptSetBool(tdoc, TidyXmlOut, yes); } else if (_T("pe") == strParam) { tidyOptSetBool(tdoc, TidyPreserveEntities, yes); } else if (_T("qa") == strParam) { tidyOptSetBool(tdoc, TidyQuoteAmpersand, yes); } else if (_T("qm") == strParam) { tidyOptSetBool(tdoc, TidyQuoteMarks, yes); } else if (_T("qn") == strParam) { tidyOptSetBool(tdoc, TidyQuoteNbsp, yes); } else if (_T("rc") == strParam) { tidyOptSetBool(tdoc, TidyReplaceColor, yes); } else if (_T("ua") == strParam) { tidyOptSetBool(tdoc, TidyUpperCaseAttrs, yes); } else if (_T("ut") == strParam) { tidyOptSetBool(tdoc, TidyUpperCaseTags, yes); } else if (_T("wo") == strParam) { tidyOptSetBool(tdoc, TidyWord2000, yes); } else if (_T("bbb") == strParam) { tidyOptSetBool(tdoc, TidyBreakBeforeBR, yes); } else if (_T("ia") == strParam) { tidyOptSetBool(tdoc, TidyIndentAttributes, yes); } else if (_T("m") == strParam) { tidyOptSetBool(tdoc, TidyShowMarkup, yes); } else if (_T("pw") == strParam) { tidyOptSetBool(tdoc, TidyPunctWrap, yes); } else if (_T("vs") == strParam) { tidyOptSetBool(tdoc, TidyVertSpace, yes); } else if (_T("wa") == strParam) { tidyOptSetBool(tdoc, TidyWrapAsp, yes); } else if (_T("wat") == strParam) { tidyOptSetBool(tdoc, TidyWrapAttVals, yes); } else if (_T("wj") == strParam) { tidyOptSetBool(tdoc, TidyWrapJste, yes); } else if (_T("wp") == strParam) { tidyOptSetBool(tdoc, TidyWrapPhp, yes); } else if (_T("wsl") == strParam) { tidyOptSetBool(tdoc, TidyWrapScriptlets, yes); } else if (_T("ws") == strParam) { tidyOptSetBool(tdoc, TidyWrapSection, yes); } else if (_T("ac") == strParam) { tidyOptSetBool(tdoc, TidyAsciiChars, yes); } else if (_T("sw") == strParam) { tidyOptSetBool(tdoc, TidyShowWarnings, yes); } else if (_T("fo") == strParam) { tidyOptSetBool(tdoc, TidyForceOutput, yes); } else if (_T("i") == strParam) { tidyOptSetInt(tdoc, TidyIndentContent, abs(nNumValue - 2) % 3); } else if (_T("md") == strParam) { tidyOptSetInt(tdoc, TidyMergeDivs, abs(nNumValue - 2) % 3); } else if (_T("ms") == strParam) { tidyOptSetInt(tdoc, TidyMergeSpans, abs(nNumValue - 2) % 3); } else if (_T("sbo") == strParam) { tidyOptSetInt(tdoc, TidyBodyOnly, abs(nNumValue - 2) % 3); } else if (_T("d") == strParam) { tidyOptSetInt(tdoc, TidyDoctypeMode, nNumValue % 5); } else if (_T("ra") == strParam) { tidyOptSetInt(tdoc, TidyDuplicateAttrs, nNumValue % 2); } else if (_T("sa") == strParam) { tidyOptSetInt(tdoc, TidySortAttributes, nNumValue % 2); } else if (_T("ce") == strParam) { tidySetCharEncoding(tdoc, GetEncodeByIndex(nNumValue)); } else if (_T("ie") == strParam) { tidySetInCharEncoding(tdoc, GetEncodeByIndex(nNumValue)); } else if (_T("oe") == strParam) { tidySetOutCharEncoding(tdoc, GetEncodeByIndex(nNumValue)); } else if (_T("se") == strParam) { tidyOptSetInt(tdoc, TidyShowErrors, nNumValue); } else if (_T("is") == strParam) { tidyOptSetInt(tdoc, TidyIndentSpaces, nNumValue); } else if (_T("ts") == strParam) { tidyOptSetInt(tdoc, TidyTabSize, nNumValue); } else if (_T("w") == strParam) { tidyOptSetInt(tdoc, TidyWrapLen, nNumValue); } else if (_T("at") == strParam) { tidyOptSetValue(tdoc, TidyAltText, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_at:strNothing)); } else if (_T("cp") == strParam) { tidyOptSetValue(tdoc, TidyCSSPrefix, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_cp:strNothing)); } else if (_T("nbt") == strParam) { tidyOptSetValue(tdoc, TidyBlockTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_nbt:strNothing)); } else if (_T("net") == strParam) { tidyOptSetValue(tdoc, TidyEmptyTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_net:strNothing)); } else if (_T("nit") == strParam) { tidyOptSetValue(tdoc, TidyInlineTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_nit:strNothing)); } else if (_T("npt") == strParam) { tidyOptSetValue(tdoc, TidyPreTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_npt:strNothing)); } }
static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file) { char *enc = NULL; size_t enc_len = 0; zend_bool use_include_path = 0; TidyDoc doc; TidyBuffer *errbuf; zend_string *data, *arg1; zval *config = NULL; if (is_file) { if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) { RETURN_FALSE; } if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) { RETURN_FALSE; } } else { if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) { RETURN_FALSE; } data = arg1; } if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) { php_error_docref(NULL, E_WARNING, "Input string is too long"); RETURN_FALSE; } doc = tidyCreate(); errbuf = emalloc(sizeof(TidyBuffer)); tidyBufInit(errbuf); if (tidySetErrorBuffer(doc, errbuf) != 0) { tidyBufFree(errbuf); efree(errbuf); tidyRelease(doc); php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer"); } tidyOptSetBool(doc, TidyForceOutput, yes); tidyOptSetBool(doc, TidyMark, no); TIDY_SET_DEFAULT_CONFIG(doc); if (config) { TIDY_APPLY_CONFIG_ZVAL(doc, config); } if(enc_len) { if (tidySetCharEncoding(doc, enc) < 0) { php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc); RETVAL_FALSE; } } if (data) { TidyBuffer buf; tidyBufInit(&buf); tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint)ZSTR_LEN(data)); if (tidyParseBuffer(doc, &buf) < 0) { php_error_docref(NULL, E_WARNING, "%s", errbuf->bp); RETVAL_FALSE; } else { if (tidyCleanAndRepair(doc) >= 0) { TidyBuffer output; tidyBufInit(&output); tidySaveBuffer (doc, &output); FIX_BUFFER(&output); RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0); tidyBufFree(&output); } else { RETVAL_FALSE; } } } if (is_file) { zend_string_release(data); } tidyBufFree(errbuf); efree(errbuf); tidyRelease(doc); }
static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value) { TidyOption opt = tidyGetOptionByName(doc, optname); zval conv; ZVAL_COPY_VALUE(&conv, value); if (!opt) { php_error_docref(NULL, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname); return FAILURE; } if (tidyOptIsReadOnly(opt)) { php_error_docref(NULL, E_NOTICE, "Attempting to set read-only option '%s'", optname); return FAILURE; } switch(tidyOptGetType(opt)) { case TidyString: if (Z_TYPE(conv) != IS_STRING) { zval_copy_ctor(&conv); convert_to_string(&conv); } if (tidyOptSetValue(doc, tidyOptGetId(opt), Z_STRVAL(conv))) { if (Z_TYPE(conv) != Z_TYPE_P(value)) { zval_dtor(&conv); } return SUCCESS; } if (Z_TYPE(conv) != Z_TYPE_P(value)) { zval_dtor(&conv); } break; case TidyInteger: if (Z_TYPE(conv) != IS_LONG) { zval_copy_ctor(&conv); convert_to_long(&conv); } if (tidyOptSetInt(doc, tidyOptGetId(opt), Z_LVAL(conv))) { return SUCCESS; } break; case TidyBoolean: if (Z_TYPE(conv) != IS_LONG) { zval_copy_ctor(&conv); convert_to_long(&conv); } if (tidyOptSetBool(doc, tidyOptGetId(opt), Z_LVAL(conv))) { return SUCCESS; } break; default: php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option"); break; } return FAILURE; }
void FetchTaskHandler(const FetchTaskMessage &message, const Theron::Address from) { std::cout<<"get data.................."<<std::endl; std::string url=message.fi->url; char curl_errbuf[CURL_ERROR_SIZE]; CURL *curl = curl_easy_init(); int err; fetch::FetchResult *result=new fetch::FetchResult(); fetch::FetchInfo fi=*(message.fi); delete message.fi; result->type=fetch::UNKNOWN; result->url=fi.url; result->pathList=fi.pathList; result->attMap=fi.attMap; int errCode=0; if(curl!=NULL) { curl_easy_setopt(curl, CURLOPT_URL,url.c_str()); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); // curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); // curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10); curl_easy_setopt(curl, CURLOPT_NOSIGNAL,1); curl_easy_setopt(curl, CURLOPT_TIMEOUT ,60); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fetch_write); std::stringstream iss; curl_easy_setopt(curl, CURLOPT_WRITEDATA, &iss); err=curl_easy_perform(curl); if ( !err ) { std::map<std::string,std::string>::iterator efit=fi.attMap.find("encode"); char *resStr=new char[iss.str().length()*3]; memset(resStr,0,iss.str().length()*3); if(efit!=fi.attMap.end()) { UErrorCode error = U_ZERO_ERROR; ucnv_convert("UTF-8",efit->second.c_str(),resStr, iss.str().length()*3, iss.str().c_str(), iss.str().length(), &error ); }else { strcpy(resStr,iss.str().c_str()); } TidyDoc tdoc; // TidyBuffer tidy_errbuf = {0}; // TidyBuffer docbuf = {0}; tdoc = tidyCreate(); tidyOptSetInt(tdoc, TidyWrapLen, 4096); // tidySetErrorBuffer( tdoc, &tidy_errbuf ); tidyOptSetBool( tdoc, TidyXmlOut, yes ); tidyOptSetBool(tdoc, TidyQuoteNbsp, no); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ // tidyOptSetBool( tdoc, TidyXmlDecl, yes ); tidyOptSetBool(tdoc, TidyQuiet, yes); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetValue(tdoc,TidyDoctype,"omit"); tidyOptSetBool(tdoc, TidyFixBackslash, yes); tidyOptSetBool(tdoc, TidyMark, no); tidySetCharEncoding(tdoc,"utf8"); // tidyBufInit(&docbuf); // err = tidyParseBuffer(tdoc, &docbuf); err = tidyParseString(tdoc, resStr); if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { // err=tidyRunDiagnostics(tdoc); /* load tidy error buffer */ // if ( err >= 0 ) { // std::cout<<"tidy error:"<<tidy_errbuf.bp<<std::endl; /* show errors */ TidyBuffer outbuf = {0}; tidyBufInit(&outbuf); tidySaveBuffer( tdoc, &outbuf ); std::stringstream hss; hss<<(char*)outbuf.bp; tidyBufFree(&outbuf); result->result=hss.str(); // } // else // { // errCode=-5; // } }else { errCode=-4; } }else { errCode=-3; } // tidyBufFree(&tidy_errbuf); // tidyBufFree(&docbuf); tidyRelease(tdoc); delete [] resStr; }else { errCode=-2; } }else { errCode=-1; } if(errCode<0) { std::stringstream ess; ess<<errCode; result->type=fetch::ERROR; result->result=ess.str(); } curl_easy_cleanup(curl); Send(FetchResultMessage(result), from); }
int main( int argc, char** argv ) { ctmbstr prog = argv[0]; ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL; TidyDoc tdoc = tidyCreate(); int status = 0; uint contentErrors = 0; uint contentWarnings = 0; uint accessWarnings = 0; errout = stderr; /* initialize to stderr */ status = 0; #ifdef TIDY_CONFIG_FILE if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) ) { status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_CONFIG_FILE, status); } #endif /* TIDY_CONFIG_FILE */ /* look for env var "HTML_TIDY" */ /* then for ~/.tidyrc (on platforms defining $HOME) */ if ( (cfgfil = getenv("HTML_TIDY")) != NULL ) { status = tidyLoadConfig( tdoc, cfgfil ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status); } #ifdef TIDY_USER_CONFIG_FILE else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) ) { status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_USER_CONFIG_FILE, status); } #endif /* TIDY_USER_CONFIG_FILE */ /* read command line */ while ( argc > 0 ) { if (argc > 1 && argv[1][0] == '-') { /* support -foo and --foo */ ctmbstr arg = argv[1] + 1; if ( strcasecmp(arg, "xml") == 0) tidyOptSetBool( tdoc, TidyXmlTags, yes ); else if ( strcasecmp(arg, "asxml") == 0 || strcasecmp(arg, "asxhtml") == 0 ) { tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); } else if ( strcasecmp(arg, "ashtml") == 0 ) tidyOptSetBool( tdoc, TidyHtmlOut, yes ); else if ( strcasecmp(arg, "indent") == 0 ) { tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) tidyOptResetToDefault( tdoc, TidyIndentSpaces ); } else if ( strcasecmp(arg, "omit") == 0 ) tidyOptSetBool( tdoc, TidyHideEndTags, yes ); else if ( strcasecmp(arg, "upper") == 0 ) tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); else if ( strcasecmp(arg, "clean") == 0 ) tidyOptSetBool( tdoc, TidyMakeClean, yes ); else if ( strcasecmp(arg, "bare") == 0 ) tidyOptSetBool( tdoc, TidyMakeBare, yes ); else if ( strcasecmp(arg, "raw") == 0 || strcasecmp(arg, "ascii") == 0 || strcasecmp(arg, "latin0") == 0 || strcasecmp(arg, "latin1") == 0 || strcasecmp(arg, "utf8") == 0 || #ifndef NO_NATIVE_ISO2022_SUPPORT strcasecmp(arg, "iso2022") == 0 || #endif #if SUPPORT_UTF16_ENCODINGS strcasecmp(arg, "utf16le") == 0 || strcasecmp(arg, "utf16be") == 0 || strcasecmp(arg, "utf16") == 0 || #endif #if SUPPORT_ASIAN_ENCODINGS strcasecmp(arg, "shiftjis") == 0 || strcasecmp(arg, "big5") == 0 || #endif strcasecmp(arg, "mac") == 0 || strcasecmp(arg, "win1252") == 0 || strcasecmp(arg, "ibm858") == 0 ) { tidySetCharEncoding( tdoc, arg ); } else if ( strcasecmp(arg, "numeric") == 0 ) tidyOptSetBool( tdoc, TidyNumEntities, yes ); else if ( strcasecmp(arg, "modify") == 0 || strcasecmp(arg, "change") == 0 || /* obsolete */ strcasecmp(arg, "update") == 0 ) /* obsolete */ { tidyOptSetBool( tdoc, TidyWriteBack, yes ); } else if ( strcasecmp(arg, "errors") == 0 ) tidyOptSetBool( tdoc, TidyShowMarkup, no ); else if ( strcasecmp(arg, "quiet") == 0 ) tidyOptSetBool( tdoc, TidyQuiet, yes ); else if ( strcasecmp(arg, "help") == 0 || strcasecmp(arg, "h") == 0 || *arg == '?' ) { help( prog ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "xml-help") == 0) { xml_help( ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "help-config") == 0 ) { optionhelp( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "xml-config") == 0 ) { XMLoptionhelp( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "show-config") == 0 ) { optionvalues( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "config") == 0 ) { if ( argc >= 3 ) { ctmbstr post; tidyLoadConfig( tdoc, argv[2] ); /* Set new error output stream if setting changed */ post = tidyOptGetValue( tdoc, TidyErrFile ); if ( post && (!errfil || !samefile(errfil, post)) ) { errfil = post; errout = tidySetErrorFile( tdoc, post ); } --argc; ++argv; } } #if SUPPORT_ASIAN_ENCODINGS else if ( strcasecmp(arg, "language") == 0 || strcasecmp(arg, "lang") == 0 ) { if ( argc >= 3 ) { tidyOptSetValue( tdoc, TidyLanguage, argv[2] ); --argc; ++argv; } } #endif else if ( strcasecmp(arg, "output") == 0 || strcasecmp(arg, "-output-file") == 0 || strcasecmp(arg, "o") == 0 ) { if ( argc >= 3 ) { tidyOptSetValue( tdoc, TidyOutFile, argv[2] ); --argc; ++argv; } } else if ( strcasecmp(arg, "file") == 0 || strcasecmp(arg, "-file") == 0 || strcasecmp(arg, "f") == 0 ) { if ( argc >= 3 ) { errfil = argv[2]; errout = tidySetErrorFile( tdoc, errfil ); --argc; ++argv; } } else if ( strcasecmp(arg, "wrap") == 0 || strcasecmp(arg, "-wrap") == 0 || strcasecmp(arg, "w") == 0 ) { if ( argc >= 3 ) { uint wraplen = 0; int nfields = sscanf( argv[2], "%u", &wraplen ); tidyOptSetInt( tdoc, TidyWrapLen, wraplen ); if (nfields > 0) { --argc; ++argv; } } } else if ( strcasecmp(arg, "version") == 0 || strcasecmp(arg, "-version") == 0 || strcasecmp(arg, "v") == 0 ) { version(); tidyRelease( tdoc ); return 0; /* success */ } else if ( strncmp(argv[1], "--", 2 ) == 0) { if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) ) { /* Set new error output stream if setting changed */ ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile ); if ( post && (!errfil || !samefile(errfil, post)) ) { errfil = post; errout = tidySetErrorFile( tdoc, post ); } ++argv; --argc; } } #if SUPPORT_ACCESSIBILITY_CHECKS else if ( strcasecmp(arg, "access") == 0 ) { if ( argc >= 3 ) { uint acclvl = 0; int nfields = sscanf( argv[2], "%u", &acclvl ); tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl ); if (nfields > 0) { --argc; ++argv; } } } #endif else { uint c; ctmbstr s = argv[1]; while ( (c = *++s) != '\0' ) { switch ( c ) { case 'i': tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) tidyOptResetToDefault( tdoc, TidyIndentSpaces ); break; /* Usurp -o for output file. Anyone hiding end tags? case 'o': tidyOptSetBool( tdoc, TidyHideEndTags, yes ); break; */ case 'u': tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); break; case 'c': tidyOptSetBool( tdoc, TidyMakeClean, yes ); break; case 'b': tidyOptSetBool( tdoc, TidyMakeBare, yes ); break; case 'n': tidyOptSetBool( tdoc, TidyNumEntities, yes ); break; case 'm': tidyOptSetBool( tdoc, TidyWriteBack, yes ); break; case 'e': tidyOptSetBool( tdoc, TidyShowMarkup, no ); break; case 'q': tidyOptSetBool( tdoc, TidyQuiet, yes ); break; default: unknownOption( c ); break; } } } --argc; ++argv; continue; } if ( argc > 1 ) { htmlfil = argv[1]; if ( tidyOptGetBool(tdoc, TidyEmacs) ) tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil ); status = tidyParseFile( tdoc, htmlfil ); } else { htmlfil = "stdin"; status = tidyParseStdin( tdoc ); } if ( status >= 0 ) status = tidyCleanAndRepair( tdoc ); if ( status >= 0 ) status = tidyRunDiagnostics( tdoc ); if ( status > 1 ) /* If errors, do we want to force output? */ status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 ); if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) ) { if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 ) status = tidySaveFile( tdoc, htmlfil ); else { ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile ); if ( outfil ) status = tidySaveFile( tdoc, outfil ); else status = tidySaveStdout( tdoc ); } } contentErrors += tidyErrorCount( tdoc ); contentWarnings += tidyWarningCount( tdoc ); accessWarnings += tidyAccessWarningCount( tdoc ); --argc; ++argv; if ( argc <= 1 ) break; } if (!tidyOptGetBool(tdoc, TidyQuiet) && errout == stderr && !contentErrors) fprintf(errout, "\n"); if (contentErrors + contentWarnings > 0 && !tidyOptGetBool(tdoc, TidyQuiet)) tidyErrorSummary(tdoc); if (!tidyOptGetBool(tdoc, TidyQuiet)) tidyGeneralInfo(tdoc); /* called to free hash tables etc. */ tidyRelease( tdoc ); /* return status can be used by scripts */ if ( contentErrors > 0 ) return 2; if ( contentWarnings > 0 ) return 1; /* 0 signifies all is ok */ return 0; }
bool TidyReader::openFile (const char * szFilename) { UT_DEBUGMSG(("using libtidy to parse HTML...\n")); m_tidy = tidyCreate (); if (m_tidy == 0) return false; if (tidyOptSetBool (m_tidy, TidyXhtmlOut, yes) == 0) { UT_DEBUGMSG(("tidyOptSetBool failed!\n")); closeFile (); return false; } #ifndef DEBUG tidySetErrorBuffer (m_tidy, &m_errbuf); #endif int parse_status; if (m_buffer && m_length) { UT_DEBUGMSG(("parse HTML in buffer...\n")); UT_Byte * buffer = const_cast<UT_Byte *>(m_buffer); // grr. TidyBuffer inbuf; tidyBufInit (&inbuf); tidyBufAttach (&inbuf, buffer, static_cast<unsigned int>(m_length)); parse_status = tidyParseBuffer (m_tidy, &inbuf); tidyBufDetach (&inbuf); } else { UT_DEBUGMSG(("parse HTML in file: %s\n",szFilename)); parse_status = tidyParseFile (m_tidy, szFilename); } if (parse_status < 0) { UT_DEBUGMSG(("tidyParseBuffer/File failed!\n")); closeFile (); return false; } parse_status = tidyCleanAndRepair (m_tidy); if (parse_status < 0) { UT_DEBUGMSG(("tidyCleanAndRepair failed!\n")); closeFile (); return false; } parse_status = tidyRunDiagnostics (m_tidy); if (parse_status < 0) { UT_DEBUGMSG(("tidyRunDiagnostics failed!\n")); closeFile (); return false; } if (parse_status > 1) { parse_status = (tidyOptSetBool (m_tidy, TidyForceOutput, yes) ? parse_status : -1); } if (parse_status < 0) { UT_DEBUGMSG(("tidyOptSetBool failed!\n")); closeFile (); return false; } parse_status = tidySaveBuffer (m_tidy, &m_outbuf); if (parse_status < 0) { UT_DEBUGMSG(("tidySaveBuffer failed!\n")); closeFile (); return false; } UT_DEBUGMSG(("tidy succeeded!\n")); #ifdef DEBUG fputs ("================================================================\n", stderr); fputs ((const char *) m_outbuf.bp, stderr); fputs ("================================================================\n", stderr); #endif m_outbuf.next = 0; return true; }
static PyObject* elementtidy_fixup(PyObject* self, PyObject* args) { int rc; TidyDoc doc; TidyBuffer out = {0}; TidyBuffer err = {0}; PyObject* pyout; PyObject* pyerr; char* text; char* encoding = NULL; if (!PyArg_ParseTuple(args, "s|s:fixup", &text, &encoding)) return NULL; doc = tidyCreate(); /* options for nice XHTML output */ if (encoding) /* if an encoding is given, use it for both input and output */ tidyOptSetValue(doc, TidyCharEncoding, encoding); else /* if no encoding is given, use default input and utf-8 output */ tidyOptSetValue(doc, TidyOutCharEncoding, "utf8"); tidyOptSetBool(doc, TidyForceOutput, yes); tidyOptSetInt(doc, TidyWrapLen, 0); tidyOptSetBool(doc, TidyQuiet, yes); tidyOptSetBool(doc, TidyXhtmlOut, yes); tidyOptSetBool(doc, TidyXmlDecl, yes); tidyOptSetInt(doc, TidyIndentContent, 0); tidyOptSetBool(doc, TidyNumEntities, yes); rc = tidySetErrorBuffer(doc, &err); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidySetErrorBuffer failed"); goto error; } rc = tidyParseString(doc, text); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyParseString failed"); goto error; } rc = tidyCleanAndRepair(doc); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyCleanAndRepair failed"); goto error; } rc = tidyRunDiagnostics(doc); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed"); goto error; } rc = tidySaveBuffer(doc, &out); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed"); goto error; } pyout = PyString_FromString(out.bp ? out.bp : ""); if (!pyout) goto error; pyerr = PyString_FromString(err.bp ? err.bp : ""); if (!pyerr) { Py_DECREF(pyout); goto error; } tidyBufFree(&out); tidyBufFree(&err); tidyRelease(doc); return Py_BuildValue("NN", pyout, pyerr); error: tidyBufFree(&out); tidyBufFree(&err); tidyRelease(doc); return NULL; }
void CCFHtmlTidy::InitTidyDefault() { TidyDoc formatter = tidyImplToDoc(tidy); tidyOptSetBool(formatter, TidyShowMarkup, no); tidyOptSetBool(formatter, TidyShowWarnings, no); tidyOptSetBool(formatter, TidyDropEmptyParas, no); tidyOptSetBool(formatter, TidyFixComments, no); tidyOptSetBool(formatter, TidyQuoteNbsp, no); tidyOptSetBool(formatter, TidyQuoteAmpersand, no); tidyOptSetBool(formatter, TidyWrapSection, no); tidyOptSetBool(formatter, TidyWrapAsp, no); tidyOptSetBool(formatter, TidyWrapJste, no); tidyOptSetBool(formatter, TidyWrapPhp, no); tidyOptSetBool(formatter, TidyFixBackslash, no); tidyOptSetBool(formatter, TidyMark, no); tidyOptSetBool(formatter, TidyFixUri, no); tidyOptSetBool(formatter, TidyLowerLiterals, no); tidyOptSetBool(formatter, TidyJoinStyles, no); tidyOptSetBool(formatter, TidyNCR, no); tidyOptSetBool(formatter, TidyAnchorAsName, no); tidyOptSetInt(formatter, TidyNewline, 0); }
void CCFHtmlTidy::SetTidyProp(const std::string& strParam, int nNumValue, const std::string& /*strNumValue*/, const std::string& strTextValue) { TidyDoc formatter = tidyImplToDoc(tidy); if ("axd" == strParam) { tidyOptSetBool(formatter, TidyXmlDecl, yes); } else if ("axs" == strParam) { tidyOptSetBool(formatter, TidyXmlSpace, yes); } else if ("aan" == strParam) { tidyOptSetBool(formatter, TidyAnchorAsName, yes); } else if ("axp" == strParam) { tidyOptSetBool(formatter, TidyXmlPIs, yes); } else if ("b" == strParam) { tidyOptSetBool(formatter, TidyMakeBare, yes); } else if ("c" == strParam) { tidyOptSetBool(formatter, TidyMakeClean, yes); } else if ("diu" == strParam) { tidyOptSetBool(formatter, TidyDecorateInferredUL, yes); } else if ("dep" == strParam) { tidyOptSetBool(formatter, TidyDropEmptyParas, yes); } else if ("dft" == strParam) { tidyOptSetBool(formatter, TidyDropFontTags, yes); } else if ("dpa" == strParam) { tidyOptSetBool(formatter, TidyDropPropAttrs, yes); } else if ("ebt" == strParam) { tidyOptSetBool(formatter, TidyEncloseBlockText, yes); } else if ("et" == strParam) { tidyOptSetBool(formatter, TidyEncloseBodyText, yes); } else if ("ec" == strParam) { tidyOptSetBool(formatter, TidyEscapeCdata, yes); } else if ("fb" == strParam) { tidyOptSetBool(formatter, TidyFixBackslash, yes); } else if ("fbc" == strParam) { tidyOptSetBool(formatter, TidyFixComments, yes); } else if ("fu" == strParam) { tidyOptSetBool(formatter, TidyFixUri, yes); } else if ("hc" == strParam) { tidyOptSetBool(formatter, TidyHideComments, yes); } else if ("he" == strParam) { tidyOptSetBool(formatter, TidyHideEndTags, yes); } else if ("ic" == strParam) { tidyOptSetBool(formatter, TidyIndentCdata, yes); } else if ("ix" == strParam) { tidyOptSetBool(formatter, TidyXmlTags, yes); } else if ("jc" == strParam) { tidyOptSetBool(formatter, TidyJoinClasses, yes); } else if ("js" == strParam) { tidyOptSetBool(formatter, TidyJoinStyles, yes); } else if ("la" == strParam) { tidyOptSetBool(formatter, TidyLiteralAttribs, yes); } else if ("le" == strParam) { tidyOptSetBool(formatter, TidyLogicalEmphasis, yes); } else if ("ll" == strParam) { tidyOptSetBool(formatter, TidyLowerLiterals, yes); } else if ("n" == strParam) { tidyOptSetBool(formatter, TidyNCR, yes); } else if ("ne" == strParam) { tidyOptSetBool(formatter, TidyNumEntities, yes); } else if ("oh" == strParam) { tidyOptSetBool(formatter, TidyHtmlOut, yes); } else if ("ox" == strParam) { tidyOptSetBool(formatter, TidyXhtmlOut, yes); } else if ("oxm" == strParam) { tidyOptSetBool(formatter, TidyXmlOut, yes); } else if ("pe" == strParam) { tidyOptSetBool(formatter, TidyPreserveEntities, yes); } else if ("qa" == strParam) { tidyOptSetBool(formatter, TidyQuoteAmpersand, yes); } else if ("qm" == strParam) { tidyOptSetBool(formatter, TidyQuoteMarks, yes); } else if ("qn" == strParam) { tidyOptSetBool(formatter, TidyQuoteNbsp, yes); } else if ("rc" == strParam) { tidyOptSetBool(formatter, TidyReplaceColor, yes); } else if ("ua" == strParam) { tidyOptSetBool(formatter, TidyUpperCaseAttrs, yes); } else if ("ut" == strParam) { tidyOptSetBool(formatter, TidyUpperCaseTags, yes); } else if ("wo" == strParam) { tidyOptSetBool(formatter, TidyWord2000, yes); } else if ("bbb" == strParam) { tidyOptSetBool(formatter, TidyBreakBeforeBR, yes); } else if ("ia" == strParam) { tidyOptSetBool(formatter, TidyIndentAttributes, yes); } else if ("m" == strParam) { tidyOptSetBool(formatter, TidyShowMarkup, yes); } else if ("pw" == strParam) { tidyOptSetBool(formatter, TidyPunctWrap, yes); } else if ("vs" == strParam) { tidyOptSetBool(formatter, TidyVertSpace, yes); } else if ("wa" == strParam) { tidyOptSetBool(formatter, TidyWrapAsp, yes); } else if ("wat" == strParam) { tidyOptSetBool(formatter, TidyWrapAttVals, yes); } else if ("wj" == strParam) { tidyOptSetBool(formatter, TidyWrapJste, yes); } else if ("wp" == strParam) { tidyOptSetBool(formatter, TidyWrapPhp, yes); } else if ("wsl" == strParam) { tidyOptSetBool(formatter, TidyWrapScriptlets, yes); } else if ("ws" == strParam) { tidyOptSetBool(formatter, TidyWrapSection, yes); } else if ("ac" == strParam) { tidyOptSetBool(formatter, TidyAsciiChars, yes); } else if ("sw" == strParam) { tidyOptSetBool(formatter, TidyShowWarnings, yes); } else if ("fo" == strParam) { tidyOptSetBool(formatter, TidyForceOutput, yes); } else if ("i" == strParam) { tidyOptSetInt(formatter, TidyIndentContent, abs(nNumValue - 2) % 3); } else if ("md" == strParam) { tidyOptSetInt(formatter, TidyMergeDivs, abs(nNumValue - 2) % 3); } else if ("ms" == strParam) { tidyOptSetInt(formatter, TidyMergeSpans, abs(nNumValue - 2) % 3); } else if ("sbo" == strParam) { tidyOptSetInt(formatter, TidyBodyOnly, abs(nNumValue - 2) % 3); } else if ("d" == strParam) { tidyOptSetInt(formatter, TidyDoctypeMode, nNumValue % 5); } else if ("du" == strParam) { tidyOptSetValue(formatter, TidyDoctype, strTextValue.c_str()); } else if ("ra" == strParam) { tidyOptSetInt(formatter, TidyDuplicateAttrs, nNumValue % 2); } else if ("sa" == strParam) { tidyOptSetInt(formatter, TidySortAttributes, nNumValue % 2); } else if ("ce" == strParam) { tidySetCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue)); } else if ("ie" == strParam) { tidySetInCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue)); } else if ("oe" == strParam) { tidySetOutCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue)); } else if ("se" == strParam) { tidyOptSetInt(formatter, TidyShowErrors, nNumValue); } else if ("is" == strParam) { tidyOptSetInt(formatter, TidyIndentSpaces, nNumValue); } else if ("ts" == strParam) { tidyOptSetInt(formatter, TidyTabSize, nNumValue); } else if ("w" == strParam) { tidyOptSetInt(formatter, TidyWrapLen, nNumValue); } else if ("at" == strParam) { tidyOptSetValue(formatter, TidyAltText, strTextValue.c_str()); } else if ("cp" == strParam) { tidyOptSetValue(formatter, TidyCSSPrefix, strTextValue.c_str()); } else if ("nbt" == strParam) { tidyOptSetValue(formatter, TidyBlockTags, strTextValue.c_str()); } else if ("net" == strParam) { tidyOptSetValue(formatter, TidyEmptyTags, strTextValue.c_str()); } else if ("nit" == strParam) { tidyOptSetValue(formatter, TidyInlineTags, strTextValue.c_str()); } else if ("npt" == strParam) { tidyOptSetValue(formatter, TidyPreTags, strTextValue.c_str()); } }
bool nuiHTML::Load(nglIStream& rStream, nglTextEncoding OverrideContentsEncoding, const nglString& rSourceURL) { if (!rSourceURL.IsEmpty()) SetSourceURL(rSourceURL); int res = -1; nglTextEncoding encoding = eUTF8; TidyDoc tdoc = NULL; { HTMLStream strm(rStream); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyShowMarkup, no); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetInt(tdoc, TidyShowErrors, 0); tidyOptSetBool(tdoc, TidyQuiet, yes); tidySetCharEncoding(tdoc, "utf8"); TidyInputSource source; tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF); res = tidyParseSource(tdoc, &source); if ( res >= 0 ) res = tidyCleanAndRepair(tdoc); // Tidy it up! if ( res >= 0 ) res = tidyRunDiagnostics(tdoc); // Kvetch if (OverrideContentsEncoding == eEncodingUnknown) { nglString encoding_string(GetEncodingString(tidyGetRoot(tdoc))); //ascii, latin1, raw, utf8, iso2022, mac, win1252, utf16le, utf16be, utf16, big5 shiftjis encoding = nuiGetTextEncodingFromString(encoding_string); } else { encoding = OverrideContentsEncoding; } } char* pStr = NULL; if (encoding != eUTF8) { // Release the doc to create a new one tidyRelease(tdoc); nglOMemory omem; rStream.SetPos(0, eStreamFromStart); rStream.PipeTo(omem); nglString decoded; decoded.Import(omem.GetBufferData(), omem.GetSize(), encoding); pStr = decoded.Export(eUTF8); nglIMemory imem(pStr, strlen(pStr)); HTMLStream strm(imem); tdoc = tidyCreate(); tidySetCharEncoding(tdoc, "utf8"); TidyInputSource source; tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF); res = tidyParseSource(tdoc, &source); if ( res >= 0 ) res = tidyCleanAndRepair(tdoc); // Tidy it up! if ( res >= 0 ) res = tidyRunDiagnostics(tdoc); // Kvetch } BuildTree(tdoc, tidyGetRoot(tdoc), eUTF8, mComputeStyle); tidyRelease(tdoc); if (pStr) free(pStr); return res < 2; }
void HTidyInterface::InitTidyDefault( TidyDoc tdoc ) { tidyOptSetBool(tdoc, TidyShowMarkup, no); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetBool(tdoc, TidyDropEmptyParas, no); tidyOptSetBool(tdoc, TidyFixComments, no); tidyOptSetBool(tdoc, TidyQuoteNbsp, no); tidyOptSetBool(tdoc, TidyQuoteAmpersand, no); tidyOptSetBool(tdoc, TidyWrapSection, no); tidyOptSetBool(tdoc, TidyWrapAsp, no); tidyOptSetBool(tdoc, TidyWrapJste, no); tidyOptSetBool(tdoc, TidyWrapPhp, no); tidyOptSetBool(tdoc, TidyFixBackslash, no); tidyOptSetBool(tdoc, TidyMark, no); tidyOptSetBool(tdoc, TidyFixUri, no); tidyOptSetBool(tdoc, TidyLowerLiterals, no); tidyOptSetBool(tdoc, TidyJoinStyles, no); tidyOptSetBool(tdoc, TidyNCR, no); tidyOptSetBool(tdoc, TidyAnchorAsName, no); }