// set option for the tidy object. int lua_tidy_setOpt ( lua_State *L ) { BOOL ok = FALSE; pTidy t = toTidy(L,1); if (lua_type(L,2) == LUA_TSTRING) { char *key = replace(lua_tostring(L,2), "_", "-", NULL); if (lua_type(L,3) == LUA_TBOOLEAN) { ok = tidyOptParseValue(t->tdoc, key, luatotidy_bool(L, 3) ? "1" : "0"); } else { ok = tidyOptParseValue(t->tdoc, key, lua_tostring(L, 3)); } } else { switch ( lua_type(L,3) ) { case LUA_TBOOLEAN: ok = tidyOptSetBool(t->tdoc, (int)lua_tonumber(L,2), luatotidy_bool(L, 3)); break; case LUA_TNUMBER: ok = tidyOptSetInt(t->tdoc, (int) lua_tonumber(L,2), (int) lua_tonumber(L, 3)); break; case LUA_TSTRING: ok = tidyOptSetValue(t->tdoc, (int) lua_tonumber(L,2), lua_tostring(L, 3)); break; } } lua_pushboolean(L, ok>=0); return 1; }
std::string cleanHTML (std::string html) { TidyDoc tidyDoc = tidyCreate(); TidyBuffer tidyOutputBuffer = {0}; // Configure Tidy // The flags tell Tidy to output XML and disable showing warnings bool configSuccess = tidyOptSetBool(tidyDoc, TidyXmlOut, yes) && tidyOptSetBool(tidyDoc, TidyQuiet, yes) && tidyOptSetBool(tidyDoc, TidyNumEntities, yes) && tidyOptSetBool(tidyDoc, TidyShowWarnings, no); tidyOptSetValue(tidyDoc,TidyForceOutput,"true"); int tidyResponseCode = -1; // Parse input if (configSuccess) tidyResponseCode = tidyParseString(tidyDoc, html.c_str()); // Process HTML if (tidyResponseCode >= 0) tidyResponseCode = tidyCleanAndRepair(tidyDoc); // Output the HTML to our buffer if (tidyResponseCode >= 0) tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer); // Any errors from Tidy? if (tidyResponseCode < 0) throw ("Tidy encountered an error while parsing an HTML response. Tidy response code: " + tidyResponseCode); // Grab the result from the buffer and then free Tidy's memory std::string tidyResult = (char*)tidyOutputBuffer.bp; tidyBufFree(&tidyOutputBuffer); tidyRelease(tidyDoc); return tidyResult; }
static html_valid_status_t html_valid_set_string_option (html_valid_t * htv, const char * coption, TidyOptionId ti, SV * value) { const char * cvalue; STRLEN cvalue_length; if (! SvOK (value)) { warn ("cannot set option '%s' to undefined value", coption); return html_valid_undefined_option; } cvalue = SvPV (value, cvalue_length); TIDY_CALL (tidyOptSetValue (htv->tdoc, ti, cvalue)); return html_valid_ok; }
HtmlTidy::HtmlTidy(const QString& html) : m_tidyDoc(tidyCreate()), m_errorOutput(), m_output(), m_input(html) { tidyOptSetBool (m_tidyDoc, TidyXmlOut, yes); tidyOptSetValue(m_tidyDoc, TidyCharEncoding, "utf8"); tidyOptSetInt (m_tidyDoc, TidyNewline, TidyLF); tidyOptSetBool (m_tidyDoc, TidyQuoteNbsp, no); tidyOptSetBool (m_tidyDoc, TidyForceOutput, yes); tidySetErrorBuffer(m_tidyDoc, &m_errorOutput); tidyParseString(m_tidyDoc, m_input.toUtf8().data()); tidyCleanAndRepair(m_tidyDoc); }
// ------------------------------------------------------------- void Webpage::tidy_me() { try { TidyDoc _tdoc = tidyCreate(); //tidyOptSetBool(_tdoc, tidyOptGetIdForName("show-body-only"), (Bool)1); tidyOptSetBool(_tdoc, tidyOptGetIdForName("output-xhtml"), (Bool)1); tidyOptSetBool(_tdoc, tidyOptGetIdForName("quote-nbsp"), (Bool)0); tidyOptSetBool(_tdoc, tidyOptGetIdForName("show-warnings"), (Bool)0); tidyOptSetValue(_tdoc, tidyOptGetIdForName("char-encoding"), "utf8"); //tidyOptSetBool(_tdoc, tidyOptGetIdForName("ascii-chars"), (Bool)1); //tidyOptSetBool(_tdoc, tidyOptGetIdForName("markup"), (Bool)1); //tidyOptSetValue(_tdoc, tidyOptGetIdForName("indent"), "yes"); //tidyOptSetValue(_tdoc, tidyOptGetIdForName("newline"), "\n"); tidyOptSetInt(_tdoc, tidyOptGetIdForName("wrap"), 5000); tidyParseString( _tdoc, contents.c_str() ); /* // tidySaveBuffer doesn't seem to work with the makefile for some reason. TidyBuffer output = {0}; tidySaveBuffer(_tdoc, &output); cout << "3. TidyBuffer size: " << output.size << endl; contents = string((char*)output.bp, (size_t)output.size); */ // tidySaveString is a tricky beast. tmbstr buffer = NULL; uint buflen = 0; int status; do { status = tidySaveString( _tdoc, buffer, &buflen ); if (status == -ENOMEM) { if(buffer) free(buffer); buffer = (tmbstr)malloc(buflen + 1); } } while (status == -ENOMEM); contents = (char*)buffer; } catch (exception& e) { throw e.what(); } }
void parse_urls(const char *filename, const url_list_t *elem) { TidyDoc tdoc; int err; FILE *outfile = NULL; tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); tidyOptSetBool(tdoc, TidyMark, no); tidyOptSetBool(tdoc, TidyHideEndTags, yes); tidyOptSetBool(tdoc, TidyDropEmptyParas, no); tidyOptSetBool(tdoc, TidyJoinStyles, no); tidyOptSetBool(tdoc, TidyPreserveEntities, yes); tidyOptSetInt(tdoc, TidyMergeDivs, no); tidyOptSetInt(tdoc, TidyMergeSpans, no); tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidyOptSetValue(tdoc, TidyCharEncoding, "utf8"); tidySetReportFilter(tdoc, filter_cb); err = tidyParseFile(tdoc, filename); if (err >= 0) err = tidyCleanAndRepair(tdoc); if (err >= 0) { outfile = option_values.save_relative_links && !option_values.disable_save_tree ? fopen(filename, "w") : NULL; parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile); if (outfile) fclose(outfile); } tidyRelease(tdoc); }
static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value) { TidyOption opt = tidyGetOptionByName(doc, optname); zval conv; ZVAL_COPY_VALUE(&conv, value); if (!opt) { php_error_docref(NULL, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname); return FAILURE; } if (tidyOptIsReadOnly(opt)) { php_error_docref(NULL, E_NOTICE, "Attempting to set read-only option '%s'", optname); return FAILURE; } switch(tidyOptGetType(opt)) { case TidyString: if (Z_TYPE(conv) != IS_STRING) { zval_copy_ctor(&conv); convert_to_string(&conv); } if (tidyOptSetValue(doc, tidyOptGetId(opt), Z_STRVAL(conv))) { if (Z_TYPE(conv) != Z_TYPE_P(value)) { zval_dtor(&conv); } return SUCCESS; } if (Z_TYPE(conv) != Z_TYPE_P(value)) { zval_dtor(&conv); } break; case TidyInteger: if (Z_TYPE(conv) != IS_LONG) { zval_copy_ctor(&conv); convert_to_long(&conv); } if (tidyOptSetInt(doc, tidyOptGetId(opt), Z_LVAL(conv))) { return SUCCESS; } break; case TidyBoolean: if (Z_TYPE(conv) != IS_LONG) { zval_copy_ctor(&conv); convert_to_long(&conv); } if (tidyOptSetBool(doc, tidyOptGetId(opt), Z_LVAL(conv))) { return SUCCESS; } break; default: php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option"); break; } return FAILURE; }
static PyObject* elementtidy_fixup(PyObject* self, PyObject* args) { int rc; TidyDoc doc; TidyBuffer out = {0}; TidyBuffer err = {0}; PyObject* pyout; PyObject* pyerr; char* text; char* encoding = NULL; if (!PyArg_ParseTuple(args, "s|s:fixup", &text, &encoding)) return NULL; doc = tidyCreate(); /* options for nice XHTML output */ if (encoding) /* if an encoding is given, use it for both input and output */ tidyOptSetValue(doc, TidyCharEncoding, encoding); else /* if no encoding is given, use default input and utf-8 output */ tidyOptSetValue(doc, TidyOutCharEncoding, "utf8"); tidyOptSetBool(doc, TidyForceOutput, yes); tidyOptSetInt(doc, TidyWrapLen, 0); tidyOptSetBool(doc, TidyQuiet, yes); tidyOptSetBool(doc, TidyXhtmlOut, yes); tidyOptSetBool(doc, TidyXmlDecl, yes); tidyOptSetInt(doc, TidyIndentContent, 0); tidyOptSetBool(doc, TidyNumEntities, yes); rc = tidySetErrorBuffer(doc, &err); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidySetErrorBuffer failed"); goto error; } rc = tidyParseString(doc, text); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyParseString failed"); goto error; } rc = tidyCleanAndRepair(doc); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyCleanAndRepair failed"); goto error; } rc = tidyRunDiagnostics(doc); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed"); goto error; } rc = tidySaveBuffer(doc, &out); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed"); goto error; } pyout = PyString_FromString(out.bp ? out.bp : ""); if (!pyout) goto error; pyerr = PyString_FromString(err.bp ? err.bp : ""); if (!pyerr) { Py_DECREF(pyout); goto error; } tidyBufFree(&out); tidyBufFree(&err); tidyRelease(doc); return Py_BuildValue("NN", pyout, pyerr); error: tidyBufFree(&out); tidyBufFree(&err); tidyRelease(doc); return NULL; }
void CCFHtmlTidy::SetTidyProp(const std::string& strParam, int nNumValue, const std::string& /*strNumValue*/, const std::string& strTextValue) { TidyDoc formatter = tidyImplToDoc(tidy); if ("axd" == strParam) { tidyOptSetBool(formatter, TidyXmlDecl, yes); } else if ("axs" == strParam) { tidyOptSetBool(formatter, TidyXmlSpace, yes); } else if ("aan" == strParam) { tidyOptSetBool(formatter, TidyAnchorAsName, yes); } else if ("axp" == strParam) { tidyOptSetBool(formatter, TidyXmlPIs, yes); } else if ("b" == strParam) { tidyOptSetBool(formatter, TidyMakeBare, yes); } else if ("c" == strParam) { tidyOptSetBool(formatter, TidyMakeClean, yes); } else if ("diu" == strParam) { tidyOptSetBool(formatter, TidyDecorateInferredUL, yes); } else if ("dep" == strParam) { tidyOptSetBool(formatter, TidyDropEmptyParas, yes); } else if ("dft" == strParam) { tidyOptSetBool(formatter, TidyDropFontTags, yes); } else if ("dpa" == strParam) { tidyOptSetBool(formatter, TidyDropPropAttrs, yes); } else if ("ebt" == strParam) { tidyOptSetBool(formatter, TidyEncloseBlockText, yes); } else if ("et" == strParam) { tidyOptSetBool(formatter, TidyEncloseBodyText, yes); } else if ("ec" == strParam) { tidyOptSetBool(formatter, TidyEscapeCdata, yes); } else if ("fb" == strParam) { tidyOptSetBool(formatter, TidyFixBackslash, yes); } else if ("fbc" == strParam) { tidyOptSetBool(formatter, TidyFixComments, yes); } else if ("fu" == strParam) { tidyOptSetBool(formatter, TidyFixUri, yes); } else if ("hc" == strParam) { tidyOptSetBool(formatter, TidyHideComments, yes); } else if ("he" == strParam) { tidyOptSetBool(formatter, TidyHideEndTags, yes); } else if ("ic" == strParam) { tidyOptSetBool(formatter, TidyIndentCdata, yes); } else if ("ix" == strParam) { tidyOptSetBool(formatter, TidyXmlTags, yes); } else if ("jc" == strParam) { tidyOptSetBool(formatter, TidyJoinClasses, yes); } else if ("js" == strParam) { tidyOptSetBool(formatter, TidyJoinStyles, yes); } else if ("la" == strParam) { tidyOptSetBool(formatter, TidyLiteralAttribs, yes); } else if ("le" == strParam) { tidyOptSetBool(formatter, TidyLogicalEmphasis, yes); } else if ("ll" == strParam) { tidyOptSetBool(formatter, TidyLowerLiterals, yes); } else if ("n" == strParam) { tidyOptSetBool(formatter, TidyNCR, yes); } else if ("ne" == strParam) { tidyOptSetBool(formatter, TidyNumEntities, yes); } else if ("oh" == strParam) { tidyOptSetBool(formatter, TidyHtmlOut, yes); } else if ("ox" == strParam) { tidyOptSetBool(formatter, TidyXhtmlOut, yes); } else if ("oxm" == strParam) { tidyOptSetBool(formatter, TidyXmlOut, yes); } else if ("pe" == strParam) { tidyOptSetBool(formatter, TidyPreserveEntities, yes); } else if ("qa" == strParam) { tidyOptSetBool(formatter, TidyQuoteAmpersand, yes); } else if ("qm" == strParam) { tidyOptSetBool(formatter, TidyQuoteMarks, yes); } else if ("qn" == strParam) { tidyOptSetBool(formatter, TidyQuoteNbsp, yes); } else if ("rc" == strParam) { tidyOptSetBool(formatter, TidyReplaceColor, yes); } else if ("ua" == strParam) { tidyOptSetBool(formatter, TidyUpperCaseAttrs, yes); } else if ("ut" == strParam) { tidyOptSetBool(formatter, TidyUpperCaseTags, yes); } else if ("wo" == strParam) { tidyOptSetBool(formatter, TidyWord2000, yes); } else if ("bbb" == strParam) { tidyOptSetBool(formatter, TidyBreakBeforeBR, yes); } else if ("ia" == strParam) { tidyOptSetBool(formatter, TidyIndentAttributes, yes); } else if ("m" == strParam) { tidyOptSetBool(formatter, TidyShowMarkup, yes); } else if ("pw" == strParam) { tidyOptSetBool(formatter, TidyPunctWrap, yes); } else if ("vs" == strParam) { tidyOptSetBool(formatter, TidyVertSpace, yes); } else if ("wa" == strParam) { tidyOptSetBool(formatter, TidyWrapAsp, yes); } else if ("wat" == strParam) { tidyOptSetBool(formatter, TidyWrapAttVals, yes); } else if ("wj" == strParam) { tidyOptSetBool(formatter, TidyWrapJste, yes); } else if ("wp" == strParam) { tidyOptSetBool(formatter, TidyWrapPhp, yes); } else if ("wsl" == strParam) { tidyOptSetBool(formatter, TidyWrapScriptlets, yes); } else if ("ws" == strParam) { tidyOptSetBool(formatter, TidyWrapSection, yes); } else if ("ac" == strParam) { tidyOptSetBool(formatter, TidyAsciiChars, yes); } else if ("sw" == strParam) { tidyOptSetBool(formatter, TidyShowWarnings, yes); } else if ("fo" == strParam) { tidyOptSetBool(formatter, TidyForceOutput, yes); } else if ("i" == strParam) { tidyOptSetInt(formatter, TidyIndentContent, abs(nNumValue - 2) % 3); } else if ("md" == strParam) { tidyOptSetInt(formatter, TidyMergeDivs, abs(nNumValue - 2) % 3); } else if ("ms" == strParam) { tidyOptSetInt(formatter, TidyMergeSpans, abs(nNumValue - 2) % 3); } else if ("sbo" == strParam) { tidyOptSetInt(formatter, TidyBodyOnly, abs(nNumValue - 2) % 3); } else if ("d" == strParam) { tidyOptSetInt(formatter, TidyDoctypeMode, nNumValue % 5); } else if ("du" == strParam) { tidyOptSetValue(formatter, TidyDoctype, strTextValue.c_str()); } else if ("ra" == strParam) { tidyOptSetInt(formatter, TidyDuplicateAttrs, nNumValue % 2); } else if ("sa" == strParam) { tidyOptSetInt(formatter, TidySortAttributes, nNumValue % 2); } else if ("ce" == strParam) { tidySetCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue)); } else if ("ie" == strParam) { tidySetInCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue)); } else if ("oe" == strParam) { tidySetOutCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue)); } else if ("se" == strParam) { tidyOptSetInt(formatter, TidyShowErrors, nNumValue); } else if ("is" == strParam) { tidyOptSetInt(formatter, TidyIndentSpaces, nNumValue); } else if ("ts" == strParam) { tidyOptSetInt(formatter, TidyTabSize, nNumValue); } else if ("w" == strParam) { tidyOptSetInt(formatter, TidyWrapLen, nNumValue); } else if ("at" == strParam) { tidyOptSetValue(formatter, TidyAltText, strTextValue.c_str()); } else if ("cp" == strParam) { tidyOptSetValue(formatter, TidyCSSPrefix, strTextValue.c_str()); } else if ("nbt" == strParam) { tidyOptSetValue(formatter, TidyBlockTags, strTextValue.c_str()); } else if ("net" == strParam) { tidyOptSetValue(formatter, TidyEmptyTags, strTextValue.c_str()); } else if ("nit" == strParam) { tidyOptSetValue(formatter, TidyInlineTags, strTextValue.c_str()); } else if ("npt" == strParam) { tidyOptSetValue(formatter, TidyPreTags, strTextValue.c_str()); } }
int main( int argc, char** argv ) { ctmbstr prog = argv[0]; ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL; TidyDoc tdoc = tidyCreate(); int status = 0; uint contentErrors = 0; uint contentWarnings = 0; uint accessWarnings = 0; errout = stderr; /* initialize to stderr */ status = 0; #ifdef TIDY_CONFIG_FILE if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) ) { status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_CONFIG_FILE, status); } #endif /* TIDY_CONFIG_FILE */ /* look for env var "HTML_TIDY" */ /* then for ~/.tidyrc (on platforms defining $HOME) */ if ( (cfgfil = getenv("HTML_TIDY")) != NULL ) { status = tidyLoadConfig( tdoc, cfgfil ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status); } #ifdef TIDY_USER_CONFIG_FILE else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) ) { status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_USER_CONFIG_FILE, status); } #endif /* TIDY_USER_CONFIG_FILE */ /* read command line */ while ( argc > 0 ) { if (argc > 1 && argv[1][0] == '-') { /* support -foo and --foo */ ctmbstr arg = argv[1] + 1; if ( strcasecmp(arg, "xml") == 0) tidyOptSetBool( tdoc, TidyXmlTags, yes ); else if ( strcasecmp(arg, "asxml") == 0 || strcasecmp(arg, "asxhtml") == 0 ) { tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); } else if ( strcasecmp(arg, "ashtml") == 0 ) tidyOptSetBool( tdoc, TidyHtmlOut, yes ); else if ( strcasecmp(arg, "indent") == 0 ) { tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) tidyOptResetToDefault( tdoc, TidyIndentSpaces ); } else if ( strcasecmp(arg, "omit") == 0 ) tidyOptSetBool( tdoc, TidyHideEndTags, yes ); else if ( strcasecmp(arg, "upper") == 0 ) tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); else if ( strcasecmp(arg, "clean") == 0 ) tidyOptSetBool( tdoc, TidyMakeClean, yes ); else if ( strcasecmp(arg, "bare") == 0 ) tidyOptSetBool( tdoc, TidyMakeBare, yes ); else if ( strcasecmp(arg, "raw") == 0 || strcasecmp(arg, "ascii") == 0 || strcasecmp(arg, "latin0") == 0 || strcasecmp(arg, "latin1") == 0 || strcasecmp(arg, "utf8") == 0 || #ifndef NO_NATIVE_ISO2022_SUPPORT strcasecmp(arg, "iso2022") == 0 || #endif #if SUPPORT_UTF16_ENCODINGS strcasecmp(arg, "utf16le") == 0 || strcasecmp(arg, "utf16be") == 0 || strcasecmp(arg, "utf16") == 0 || #endif #if SUPPORT_ASIAN_ENCODINGS strcasecmp(arg, "shiftjis") == 0 || strcasecmp(arg, "big5") == 0 || #endif strcasecmp(arg, "mac") == 0 || strcasecmp(arg, "win1252") == 0 || strcasecmp(arg, "ibm858") == 0 ) { tidySetCharEncoding( tdoc, arg ); } else if ( strcasecmp(arg, "numeric") == 0 ) tidyOptSetBool( tdoc, TidyNumEntities, yes ); else if ( strcasecmp(arg, "modify") == 0 || strcasecmp(arg, "change") == 0 || /* obsolete */ strcasecmp(arg, "update") == 0 ) /* obsolete */ { tidyOptSetBool( tdoc, TidyWriteBack, yes ); } else if ( strcasecmp(arg, "errors") == 0 ) tidyOptSetBool( tdoc, TidyShowMarkup, no ); else if ( strcasecmp(arg, "quiet") == 0 ) tidyOptSetBool( tdoc, TidyQuiet, yes ); else if ( strcasecmp(arg, "help") == 0 || strcasecmp(arg, "h") == 0 || *arg == '?' ) { help( prog ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "xml-help") == 0) { xml_help( ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "help-config") == 0 ) { optionhelp( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "xml-config") == 0 ) { XMLoptionhelp( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "show-config") == 0 ) { optionvalues( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "config") == 0 ) { if ( argc >= 3 ) { ctmbstr post; tidyLoadConfig( tdoc, argv[2] ); /* Set new error output stream if setting changed */ post = tidyOptGetValue( tdoc, TidyErrFile ); if ( post && (!errfil || !samefile(errfil, post)) ) { errfil = post; errout = tidySetErrorFile( tdoc, post ); } --argc; ++argv; } } #if SUPPORT_ASIAN_ENCODINGS else if ( strcasecmp(arg, "language") == 0 || strcasecmp(arg, "lang") == 0 ) { if ( argc >= 3 ) { tidyOptSetValue( tdoc, TidyLanguage, argv[2] ); --argc; ++argv; } } #endif else if ( strcasecmp(arg, "output") == 0 || strcasecmp(arg, "-output-file") == 0 || strcasecmp(arg, "o") == 0 ) { if ( argc >= 3 ) { tidyOptSetValue( tdoc, TidyOutFile, argv[2] ); --argc; ++argv; } } else if ( strcasecmp(arg, "file") == 0 || strcasecmp(arg, "-file") == 0 || strcasecmp(arg, "f") == 0 ) { if ( argc >= 3 ) { errfil = argv[2]; errout = tidySetErrorFile( tdoc, errfil ); --argc; ++argv; } } else if ( strcasecmp(arg, "wrap") == 0 || strcasecmp(arg, "-wrap") == 0 || strcasecmp(arg, "w") == 0 ) { if ( argc >= 3 ) { uint wraplen = 0; int nfields = sscanf( argv[2], "%u", &wraplen ); tidyOptSetInt( tdoc, TidyWrapLen, wraplen ); if (nfields > 0) { --argc; ++argv; } } } else if ( strcasecmp(arg, "version") == 0 || strcasecmp(arg, "-version") == 0 || strcasecmp(arg, "v") == 0 ) { version(); tidyRelease( tdoc ); return 0; /* success */ } else if ( strncmp(argv[1], "--", 2 ) == 0) { if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) ) { /* Set new error output stream if setting changed */ ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile ); if ( post && (!errfil || !samefile(errfil, post)) ) { errfil = post; errout = tidySetErrorFile( tdoc, post ); } ++argv; --argc; } } #if SUPPORT_ACCESSIBILITY_CHECKS else if ( strcasecmp(arg, "access") == 0 ) { if ( argc >= 3 ) { uint acclvl = 0; int nfields = sscanf( argv[2], "%u", &acclvl ); tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl ); if (nfields > 0) { --argc; ++argv; } } } #endif else { uint c; ctmbstr s = argv[1]; while ( (c = *++s) != '\0' ) { switch ( c ) { case 'i': tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) tidyOptResetToDefault( tdoc, TidyIndentSpaces ); break; /* Usurp -o for output file. Anyone hiding end tags? case 'o': tidyOptSetBool( tdoc, TidyHideEndTags, yes ); break; */ case 'u': tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); break; case 'c': tidyOptSetBool( tdoc, TidyMakeClean, yes ); break; case 'b': tidyOptSetBool( tdoc, TidyMakeBare, yes ); break; case 'n': tidyOptSetBool( tdoc, TidyNumEntities, yes ); break; case 'm': tidyOptSetBool( tdoc, TidyWriteBack, yes ); break; case 'e': tidyOptSetBool( tdoc, TidyShowMarkup, no ); break; case 'q': tidyOptSetBool( tdoc, TidyQuiet, yes ); break; default: unknownOption( c ); break; } } } --argc; ++argv; continue; } if ( argc > 1 ) { htmlfil = argv[1]; if ( tidyOptGetBool(tdoc, TidyEmacs) ) tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil ); status = tidyParseFile( tdoc, htmlfil ); } else { htmlfil = "stdin"; status = tidyParseStdin( tdoc ); } if ( status >= 0 ) status = tidyCleanAndRepair( tdoc ); if ( status >= 0 ) status = tidyRunDiagnostics( tdoc ); if ( status > 1 ) /* If errors, do we want to force output? */ status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 ); if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) ) { if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 ) status = tidySaveFile( tdoc, htmlfil ); else { ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile ); if ( outfil ) status = tidySaveFile( tdoc, outfil ); else status = tidySaveStdout( tdoc ); } } contentErrors += tidyErrorCount( tdoc ); contentWarnings += tidyWarningCount( tdoc ); accessWarnings += tidyAccessWarningCount( tdoc ); --argc; ++argv; if ( argc <= 1 ) break; } if (!tidyOptGetBool(tdoc, TidyQuiet) && errout == stderr && !contentErrors) fprintf(errout, "\n"); if (contentErrors + contentWarnings > 0 && !tidyOptGetBool(tdoc, TidyQuiet)) tidyErrorSummary(tdoc); if (!tidyOptGetBool(tdoc, TidyQuiet)) tidyGeneralInfo(tdoc); /* called to free hash tables etc. */ tidyRelease( tdoc ); /* return status can be used by scripts */ if ( contentErrors > 0 ) return 2; if ( contentWarnings > 0 ) return 1; /* 0 signifies all is ok */ return 0; }
void FetchTaskHandler(const FetchTaskMessage &message, const Theron::Address from) { std::cout<<"get data.................."<<std::endl; std::string url=message.fi->url; char curl_errbuf[CURL_ERROR_SIZE]; CURL *curl = curl_easy_init(); int err; fetch::FetchResult *result=new fetch::FetchResult(); fetch::FetchInfo fi=*(message.fi); delete message.fi; result->type=fetch::UNKNOWN; result->url=fi.url; result->pathList=fi.pathList; result->attMap=fi.attMap; int errCode=0; if(curl!=NULL) { curl_easy_setopt(curl, CURLOPT_URL,url.c_str()); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); // curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); // curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10); curl_easy_setopt(curl, CURLOPT_NOSIGNAL,1); curl_easy_setopt(curl, CURLOPT_TIMEOUT ,60); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fetch_write); std::stringstream iss; curl_easy_setopt(curl, CURLOPT_WRITEDATA, &iss); err=curl_easy_perform(curl); if ( !err ) { std::map<std::string,std::string>::iterator efit=fi.attMap.find("encode"); char *resStr=new char[iss.str().length()*3]; memset(resStr,0,iss.str().length()*3); if(efit!=fi.attMap.end()) { UErrorCode error = U_ZERO_ERROR; ucnv_convert("UTF-8",efit->second.c_str(),resStr, iss.str().length()*3, iss.str().c_str(), iss.str().length(), &error ); }else { strcpy(resStr,iss.str().c_str()); } TidyDoc tdoc; // TidyBuffer tidy_errbuf = {0}; // TidyBuffer docbuf = {0}; tdoc = tidyCreate(); tidyOptSetInt(tdoc, TidyWrapLen, 4096); // tidySetErrorBuffer( tdoc, &tidy_errbuf ); tidyOptSetBool( tdoc, TidyXmlOut, yes ); tidyOptSetBool(tdoc, TidyQuoteNbsp, no); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ // tidyOptSetBool( tdoc, TidyXmlDecl, yes ); tidyOptSetBool(tdoc, TidyQuiet, yes); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetValue(tdoc,TidyDoctype,"omit"); tidyOptSetBool(tdoc, TidyFixBackslash, yes); tidyOptSetBool(tdoc, TidyMark, no); tidySetCharEncoding(tdoc,"utf8"); // tidyBufInit(&docbuf); // err = tidyParseBuffer(tdoc, &docbuf); err = tidyParseString(tdoc, resStr); if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { // err=tidyRunDiagnostics(tdoc); /* load tidy error buffer */ // if ( err >= 0 ) { // std::cout<<"tidy error:"<<tidy_errbuf.bp<<std::endl; /* show errors */ TidyBuffer outbuf = {0}; tidyBufInit(&outbuf); tidySaveBuffer( tdoc, &outbuf ); std::stringstream hss; hss<<(char*)outbuf.bp; tidyBufFree(&outbuf); result->result=hss.str(); // } // else // { // errCode=-5; // } }else { errCode=-4; } }else { errCode=-3; } // tidyBufFree(&tidy_errbuf); // tidyBufFree(&docbuf); tidyRelease(tdoc); delete [] resStr; }else { errCode=-2; } }else { errCode=-1; } if(errCode<0) { std::stringstream ess; ess<<errCode; result->type=fetch::ERROR; result->result=ess.str(); } curl_easy_cleanup(curl); Send(FetchResultMessage(result), from); }
void HTidyInterface::SetTidyControl( TidyDoc tdoc, LPCTSTR lpszTidy, int nPos, int nSize ) { if (lpszTidy[nPos] != '-' || nSize < 2) { return; } int nNumValue = nSize; for (int i = nPos + 1; i < nPos + nSize; ++i) { if (!_istalpha(lpszTidy[i])) { nNumValue = i - nPos; break; } } CString strParam(lpszTidy + nPos + 1, nNumValue - 1); if (nNumValue != nSize) { CString strNum(lpszTidy + nPos + nNumValue, nSize - nNumValue); nNumValue = _ttoi(strNum); } else { nNumValue = 0; } CString strNumValue; strNumValue.Format(_T("%d"), nNumValue); CString strNothing; strNothing.Empty(); if (_T("axd") == strParam) { tidyOptSetBool(tdoc, TidyXmlDecl, yes); } else if (_T("axs") == strParam) { tidyOptSetBool(tdoc, TidyXmlSpace, yes); } else if (_T("aan") == strParam) { tidyOptSetBool(tdoc, TidyAnchorAsName, yes); } else if (_T("axp") == strParam) { tidyOptSetBool(tdoc, TidyXmlPIs, yes); } else if (_T("b") == strParam) { tidyOptSetBool(tdoc, TidyMakeBare, yes); } else if (_T("c") == strParam) { tidyOptSetBool(tdoc, TidyMakeClean, yes); } else if (_T("diu") == strParam) { tidyOptSetBool(tdoc, TidyDecorateInferredUL, yes); } else if (_T("dep") == strParam) { tidyOptSetBool(tdoc, TidyDropEmptyParas, yes); } else if (_T("dft") == strParam) { tidyOptSetBool(tdoc, TidyDropFontTags, yes); } else if (_T("dpa") == strParam) { tidyOptSetBool(tdoc, TidyDropPropAttrs, yes); } else if (_T("ebt") == strParam) { tidyOptSetBool(tdoc, TidyEncloseBlockText, yes); } else if (_T("et") == strParam) { tidyOptSetBool(tdoc, TidyEncloseBodyText, yes); } else if (_T("ec") == strParam) { tidyOptSetBool(tdoc, TidyEscapeCdata, yes); } else if (_T("fb") == strParam) { tidyOptSetBool(tdoc, TidyFixBackslash, yes); } else if (_T("fbc") == strParam) { tidyOptSetBool(tdoc, TidyFixComments, yes); } else if (_T("fu") == strParam) { tidyOptSetBool(tdoc, TidyFixUri, yes); } else if (_T("hc") == strParam) { tidyOptSetBool(tdoc, TidyHideComments, yes); } else if (_T("he") == strParam) { tidyOptSetBool(tdoc, TidyHideEndTags, yes); } else if (_T("ic") == strParam) { tidyOptSetBool(tdoc, TidyIndentCdata, yes); } else if (_T("ix") == strParam) { tidyOptSetBool(tdoc, TidyXmlTags, yes); } else if (_T("jc") == strParam) { tidyOptSetBool(tdoc, TidyJoinClasses, yes); } else if (_T("js") == strParam) { tidyOptSetBool(tdoc, TidyJoinStyles, yes); } else if (_T("la") == strParam) { tidyOptSetBool(tdoc, TidyLiteralAttribs, yes); } else if (_T("le") == strParam) { tidyOptSetBool(tdoc, TidyLogicalEmphasis, yes); } else if (_T("ll") == strParam) { tidyOptSetBool(tdoc, TidyLowerLiterals, yes); } else if (_T("n") == strParam) { tidyOptSetBool(tdoc, TidyNCR, yes); } else if (_T("ne") == strParam) { tidyOptSetBool(tdoc, TidyNumEntities, yes); } else if (_T("oh") == strParam) { tidyOptSetBool(tdoc, TidyHtmlOut, yes); } else if (_T("ox") == strParam) { tidyOptSetBool(tdoc, TidyXhtmlOut, yes); } else if (_T("oxm") == strParam) { tidyOptSetBool(tdoc, TidyXmlOut, yes); } else if (_T("pe") == strParam) { tidyOptSetBool(tdoc, TidyPreserveEntities, yes); } else if (_T("qa") == strParam) { tidyOptSetBool(tdoc, TidyQuoteAmpersand, yes); } else if (_T("qm") == strParam) { tidyOptSetBool(tdoc, TidyQuoteMarks, yes); } else if (_T("qn") == strParam) { tidyOptSetBool(tdoc, TidyQuoteNbsp, yes); } else if (_T("rc") == strParam) { tidyOptSetBool(tdoc, TidyReplaceColor, yes); } else if (_T("ua") == strParam) { tidyOptSetBool(tdoc, TidyUpperCaseAttrs, yes); } else if (_T("ut") == strParam) { tidyOptSetBool(tdoc, TidyUpperCaseTags, yes); } else if (_T("wo") == strParam) { tidyOptSetBool(tdoc, TidyWord2000, yes); } else if (_T("bbb") == strParam) { tidyOptSetBool(tdoc, TidyBreakBeforeBR, yes); } else if (_T("ia") == strParam) { tidyOptSetBool(tdoc, TidyIndentAttributes, yes); } else if (_T("m") == strParam) { tidyOptSetBool(tdoc, TidyShowMarkup, yes); } else if (_T("pw") == strParam) { tidyOptSetBool(tdoc, TidyPunctWrap, yes); } else if (_T("vs") == strParam) { tidyOptSetBool(tdoc, TidyVertSpace, yes); } else if (_T("wa") == strParam) { tidyOptSetBool(tdoc, TidyWrapAsp, yes); } else if (_T("wat") == strParam) { tidyOptSetBool(tdoc, TidyWrapAttVals, yes); } else if (_T("wj") == strParam) { tidyOptSetBool(tdoc, TidyWrapJste, yes); } else if (_T("wp") == strParam) { tidyOptSetBool(tdoc, TidyWrapPhp, yes); } else if (_T("wsl") == strParam) { tidyOptSetBool(tdoc, TidyWrapScriptlets, yes); } else if (_T("ws") == strParam) { tidyOptSetBool(tdoc, TidyWrapSection, yes); } else if (_T("ac") == strParam) { tidyOptSetBool(tdoc, TidyAsciiChars, yes); } else if (_T("sw") == strParam) { tidyOptSetBool(tdoc, TidyShowWarnings, yes); } else if (_T("fo") == strParam) { tidyOptSetBool(tdoc, TidyForceOutput, yes); } else if (_T("i") == strParam) { tidyOptSetInt(tdoc, TidyIndentContent, abs(nNumValue - 2) % 3); } else if (_T("md") == strParam) { tidyOptSetInt(tdoc, TidyMergeDivs, abs(nNumValue - 2) % 3); } else if (_T("ms") == strParam) { tidyOptSetInt(tdoc, TidyMergeSpans, abs(nNumValue - 2) % 3); } else if (_T("sbo") == strParam) { tidyOptSetInt(tdoc, TidyBodyOnly, abs(nNumValue - 2) % 3); } else if (_T("d") == strParam) { tidyOptSetInt(tdoc, TidyDoctypeMode, nNumValue % 5); } else if (_T("ra") == strParam) { tidyOptSetInt(tdoc, TidyDuplicateAttrs, nNumValue % 2); } else if (_T("sa") == strParam) { tidyOptSetInt(tdoc, TidySortAttributes, nNumValue % 2); } else if (_T("ce") == strParam) { tidySetCharEncoding(tdoc, GetEncodeByIndex(nNumValue)); } else if (_T("ie") == strParam) { tidySetInCharEncoding(tdoc, GetEncodeByIndex(nNumValue)); } else if (_T("oe") == strParam) { tidySetOutCharEncoding(tdoc, GetEncodeByIndex(nNumValue)); } else if (_T("se") == strParam) { tidyOptSetInt(tdoc, TidyShowErrors, nNumValue); } else if (_T("is") == strParam) { tidyOptSetInt(tdoc, TidyIndentSpaces, nNumValue); } else if (_T("ts") == strParam) { tidyOptSetInt(tdoc, TidyTabSize, nNumValue); } else if (_T("w") == strParam) { tidyOptSetInt(tdoc, TidyWrapLen, nNumValue); } else if (_T("at") == strParam) { tidyOptSetValue(tdoc, TidyAltText, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_at:strNothing)); } else if (_T("cp") == strParam) { tidyOptSetValue(tdoc, TidyCSSPrefix, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_cp:strNothing)); } else if (_T("nbt") == strParam) { tidyOptSetValue(tdoc, TidyBlockTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_nbt:strNothing)); } else if (_T("net") == strParam) { tidyOptSetValue(tdoc, TidyEmptyTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_net:strNothing)); } else if (_T("nit") == strParam) { tidyOptSetValue(tdoc, TidyInlineTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_nit:strNothing)); } else if (_T("npt") == strParam) { tidyOptSetValue(tdoc, TidyPreTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_npt:strNothing)); } }