/* the entry point */ void html2nodes(const char *htmltext, bool startpage) { char *htmlfix = 0; tdoc = tidyCreate(); if (!startpage) tidyOptSetInt(tdoc, TidyBodyOnly, yes); tidySetReportFilter(tdoc, tidyErrorHandler); // tidySetReportFilter(tdoc, tidyReportFilter); tidySetCharEncoding(tdoc, (cons_utf8 ? "utf8" : "latin1")); htmlfix = tidyPreprocess(htmltext); if (htmlfix) { tidyParseString(tdoc, htmlfix); nzFree(htmlfix); } else tidyParseString(tdoc, htmltext); tidyCleanAndRepair(tdoc); if (debugLevel >= 5) { traverse_tidycall = printNode; traverseTidy(); } /* convert tidy nodes into edbrowse nodes */ traverse_tidycall = convertNode; traverseTidy(); tidyRelease(tdoc); } /* html2nodes */
TidyNetworkReply::~TidyNetworkReply() { delete nam; if (!this->attribute(QNetworkRequest::RedirectionTargetAttribute).isValid()) { tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); } }
static html_valid_status_t html_valid_destroy (html_valid_t * htv) { tidyRelease (htv->tdoc); htv->tdoc = 0; htv->n_mallocs--; return html_valid_ok; }
QString tidyHtml(QString str, bool& ok) { #ifdef NO_TIDY ok = true; return str; #else QString res = str; ok = false; static bool isTidyWithIntBodyOnly = isTidyWithIntBodyOnlyCheck(); TidyDoc tdoc = tidyCreate(); TidyBuffer output; TidyBuffer errbuf; tidyBufInit(&output); tidyBufInit(&errbuf); bool configOk = tidyOptSetBool(tdoc, TidyXhtmlOut, yes) && tidyOptSetBool(tdoc, TidyForceOutput, yes) && tidyOptSetBool(tdoc, TidyMark, no) && (isTidyWithIntBodyOnly ? tidyOptSetInt(tdoc, TidyBodyOnly, 1) : tidyOptSetBool(tdoc, TidyBodyOnly, yes)) && tidyOptSetInt(tdoc, TidyWrapLen, 0) && tidyOptSetInt(tdoc, TidyDoctypeMode, TidyDoctypeOmit); if (configOk && (tidySetCharEncoding(tdoc, "utf8") >= 0) && (tidySetErrorBuffer(tdoc, &errbuf) >= 0) && (tidyParseString(tdoc, str.toUtf8().data()) >= 0) && (tidyCleanAndRepair(tdoc) >= 0) && (tidyRunDiagnostics(tdoc) >= 0) && (tidySaveBuffer(tdoc, &output) >= 0) && (output.bp != 0 && output.size > 0)) { res = QString::fromUtf8((char*)output.bp, output.size); ok = true; } #ifdef DEBUG_MARKUP if (errbuf.size > 0) { QString errStr = QString::fromUtf8((char*)errbuf.bp, errbuf.size); qDebug() << "\n[DEBUG] MARKUP, libtidy errors and warnings:\n" << errStr; } #endif if (output.bp != 0) tidyBufFree(&output); if (errbuf.bp != 0) tidyBufFree(&errbuf); tidyRelease(tdoc); return res.trimmed(); #endif }
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { TidyDoc tdoc = tidyCreate(); // At the time this fuzzer was written, the configuration parser could // only be exercised via a file interface. char* tmpfile = fuzzer_get_tmpfile(data, size); tidyLoadConfig(tdoc, tmpfile); fuzzer_release_tmpfile(tmpfile); tidyRelease(tdoc); return 0; }
// garbage collection, make sure we mark all opened nodes as expired. int lua_tidy_gc ( lua_State *L ) { pTidy t = toTidy(L,1); doc_clear_all_nodes(t); // all nodes are marked as expired. tidyRelease( t->tdoc ); tidyBufFree( &t->errbuf ); t->tdoc = NULL; return 0; }
int main(int argc, char **argv ) { CURL *curl; char curl_errbuf[CURL_ERROR_SIZE]; TidyDoc tdoc; TidyBuffer docbuf = {0}; TidyBuffer tidy_errbuf = {0}; int err; if ( argc == 2) { curl = curl_easy_init(); curl_easy_setopt(curl, CURLOPT_URL, argv[1]); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidySetErrorBuffer( tdoc, &tidy_errbuf ); tidyBufInit(&docbuf); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf); err=curl_easy_perform(curl); if ( !err ) { err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */ if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ if ( err >= 0 ) { dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */ fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */ } } } } else fprintf(stderr, "%s\n", curl_errbuf); /* clean-up */ curl_easy_cleanup(curl); tidyBufFree(&docbuf); tidyBufFree(&tidy_errbuf); tidyRelease(tdoc); return(err); } else printf( "usage: %s <url>\n", argv[0] ); return(0); }
std::string cleanHTML (std::string html) { TidyDoc tidyDoc = tidyCreate(); TidyBuffer tidyOutputBuffer = {0}; // Configure Tidy // The flags tell Tidy to output XML and disable showing warnings bool configSuccess = tidyOptSetBool(tidyDoc, TidyXmlOut, yes) && tidyOptSetBool(tidyDoc, TidyQuiet, yes) && tidyOptSetBool(tidyDoc, TidyNumEntities, yes) && tidyOptSetBool(tidyDoc, TidyShowWarnings, no); tidyOptSetValue(tidyDoc,TidyForceOutput,"true"); int tidyResponseCode = -1; // Parse input if (configSuccess) tidyResponseCode = tidyParseString(tidyDoc, html.c_str()); // Process HTML if (tidyResponseCode >= 0) tidyResponseCode = tidyCleanAndRepair(tidyDoc); // Output the HTML to our buffer if (tidyResponseCode >= 0) tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer); // Any errors from Tidy? if (tidyResponseCode < 0) throw ("Tidy encountered an error while parsing an HTML response. Tidy response code: " + tidyResponseCode); // Grab the result from the buffer and then free Tidy's memory std::string tidyResult = (char*)tidyOutputBuffer.bp; tidyBufFree(&tidyOutputBuffer); tidyRelease(tidyDoc); return tidyResult; }
void TidyReader::closeFile (void) { if (m_tidy) { tidyBufFree (&m_outbuf); #ifndef DEBUG tidyBufFree (&m_errbuf); #endif tidyRelease (m_tidy); m_tidy = 0; } memset (&m_outbuf, 0 , sizeof (TidyBuffer)); memset (&m_errbuf, 0 , sizeof (TidyBuffer)); }
static void tidy_object_free_storage(zend_object *object) { PHPTidyObj *intern = php_tidy_fetch_object(object); zend_object_std_dtor(&intern->std); if (intern->ptdoc) { intern->ptdoc->ref_count--; if (intern->ptdoc->ref_count <= 0) { tidyBufFree(intern->ptdoc->errbuf); efree(intern->ptdoc->errbuf); tidyRelease(intern->ptdoc->doc); efree(intern->ptdoc); } } }
bool CCFHtmlTidy::TidyMain(const char* pSourceIn, const char* pOptions, std::string &strOut, std::string &strErr) { TidyBuffer output; TidyBuffer errbuf; int rc = -1; Bool ok = yes; TidyDoc tdoc = tidyCreate(); // Initialize "document" tidyBufInit(&output); tidyBufInit(&errbuf); TidyOptionsSet(tidyDocToImpl(tdoc), pOptions); if (ok) rc = tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics if (rc >= 0) rc = tidyParseString(tdoc, pSourceIn); // Parse the input if (rc >= 0) rc = tidyCleanAndRepair(tdoc); // Tidy it up! if (rc >= 0) rc = tidyRunDiagnostics(tdoc); // Kvetch //if ( rc > 1 ) // If error, force output. // rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if (rc >= 0) rc = tidySaveBuffer(tdoc, &output); // Pretty Print if (rc >= 0) { if (output.bp) { strOut = reinterpret_cast<char const*>(output.bp); } } strErr = reinterpret_cast<char const*>(errbuf.bp); std::string strEmpty = "No warnings or errors were found.\n\n"; if (0 == strEmpty.compare(strErr)) { strErr.clear(); } tidyBufFree(&output); tidyBufFree(&errbuf); tidyRelease(tdoc); return true; }
QString tidy(QString input) // take html code and return it converted to xhtml code { // the following code is (c) Charles Reitzel and Dave Raggett, see the package tidy TidyBuffer output = {0}; TidyBuffer errbuf = {0}; QString result; int rc = -1; Bool ok; TidyDoc tdoc = tidyCreate(); // Initialize "document" ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics tidySetCharEncoding( tdoc, "utf8" ); if ( rc >= 0 ) rc = tidyParseString( tdoc, input.toUtf8().constData() ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc >= 0 ) { char* outputstring; // content of the outputfile // find out length of outputstring int length=0; // length of outputstring byte* string=output.bp; while (*string) { string++; length++; } outputstring=(char*)malloc(length); snprintf(outputstring,length,"%s",output.bp); result=QString::fromUtf8(outputstring,length); } else printf( "A severe error (\%d) occurred.\\n", rc ); tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); result=result.replace("ö","ö"); return result; }
void tidy(std::string &input) { TidyBuffer output = {0}; TidyBuffer errbuf = {0}; TidyDoc tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyXhtmlOut, yes); tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics tidyParseString(tdoc, input.c_str()); tidyCleanAndRepair(tdoc); tidySaveBuffer(tdoc, &output); input = std::string((const char*)output.bp); tidyBufFree(&output); tidyBufFree(&errbuf); tidyRelease(tdoc); }
bool HTidyInterface::formatSource( const char* textIn, CString &strTidy, CString &strMsg ) { TidyBuffer output; TidyBuffer errbuf; int rc = -1; Bool ok = yes; TidyDoc tdoc = tidyCreate(); // Initialize "document" tidyBufInit(&output); tidyBufInit(&errbuf); InitTidyDefault(tdoc); SetTidyConfig(tdoc); if ( ok ) rc = tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString(tdoc, textIn); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair(tdoc); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics(tdoc); // Kvetch //if ( rc > 1 ) // If error, force output. // rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer(tdoc, &output); // Pretty Print if ( rc >= 0 ) { strTidy = reinterpret_cast< char const* >(output.bp); } strMsg = reinterpret_cast< char const* >(errbuf.bp); CString strEmpty = _T("No warnings or errors were found.\r\n\r\n"); if (0 == strEmpty.Compare(strMsg)) { strMsg.Empty(); } tidyBufFree(&output); tidyBufFree(&errbuf); tidyRelease(tdoc); return true; }
static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype) { PHPTidyObj *intern; intern = ecalloc(1, sizeof(PHPTidyObj) + zend_object_properties_size(class_type)); zend_object_std_init(&intern->std, class_type); object_properties_init(&intern->std, class_type); switch(objtype) { case is_node: break; case is_doc: intern->ptdoc = emalloc(sizeof(PHPTidyDoc)); intern->ptdoc->doc = tidyCreate(); intern->ptdoc->ref_count = 1; intern->ptdoc->initialized = 0; intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer)); tidyBufInit(intern->ptdoc->errbuf); if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) { tidyBufFree(intern->ptdoc->errbuf); efree(intern->ptdoc->errbuf); tidyRelease(intern->ptdoc->doc); efree(intern->ptdoc); efree(intern); php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer"); } tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes); tidyOptSetBool(intern->ptdoc->doc, TidyMark, no); TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc); tidy_add_default_properties(intern, is_doc); break; } intern->std.handlers = handlers; return &intern->std; }
int main(int argc, char **argv ) { const char* input = "<title>Hello</title><p>World!"; TidyBuffer output = {0}; TidyBuffer errbuf = {0}; int rc = -1; Bool ok; // Initialize "document" TidyDoc tdoc = tidyCreate(); printf( "Tidying:\t%s\n", input ); // Convert to XHTML ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, input ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc >= 0 ) { if ( rc > 0 ) printf( "\nDiagnostics:\n\n%s", errbuf.bp ); printf( "\nAnd here is the result:\n\n%s", output.bp ); } else printf( "A severe error (%d) occurred.\n", rc ); tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); return rc; }
static int php_tidy_output_handler(void **nothing, php_output_context *output_context) { int status = FAILURE; TidyDoc doc; TidyBuffer inbuf, outbuf, errbuf; if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) { doc = tidyCreate(); tidyBufInit(&errbuf); if (0 == tidySetErrorBuffer(doc, &errbuf)) { tidyOptSetBool(doc, TidyForceOutput, yes); tidyOptSetBool(doc, TidyMark, no); if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) { php_error_docref(NULL, E_WARNING, "Input string is too long"); return status; } TIDY_SET_DEFAULT_CONFIG(doc); tidyBufInit(&inbuf); tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint)output_context->in.used); if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) { tidyBufInit(&outbuf); tidySaveBuffer(doc, &outbuf); FIX_BUFFER(&outbuf); output_context->out.data = (char *) outbuf.bp; output_context->out.used = outbuf.size ? outbuf.size-1 : 0; output_context->out.free = 1; status = SUCCESS; } } tidyRelease(doc); tidyBufFree(&errbuf); } return status; }
void parse_urls(const char *filename, const url_list_t *elem) { TidyDoc tdoc; int err; FILE *outfile = NULL; tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); tidyOptSetBool(tdoc, TidyMark, no); tidyOptSetBool(tdoc, TidyHideEndTags, yes); tidyOptSetBool(tdoc, TidyDropEmptyParas, no); tidyOptSetBool(tdoc, TidyJoinStyles, no); tidyOptSetBool(tdoc, TidyPreserveEntities, yes); tidyOptSetInt(tdoc, TidyMergeDivs, no); tidyOptSetInt(tdoc, TidyMergeSpans, no); tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidyOptSetValue(tdoc, TidyCharEncoding, "utf8"); tidySetReportFilter(tdoc, filter_cb); err = tidyParseFile(tdoc, filename); if (err >= 0) err = tidyCleanAndRepair(tdoc); if (err >= 0) { outfile = option_values.save_relative_links && !option_values.disable_save_tree ? fopen(filename, "w") : NULL; parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile); if (outfile) fclose(outfile); } tidyRelease(tdoc); }
int CProxyParse::RunFromMem( wxString content ) { char *pBuffer; //http://www.51proxied.com/http_non_anonymous.html //wxString path = wxT("f:/work/windows/wxUrlRefresh/data/最新透明HTTP代理服务器.htm"); //wxString path1 = wxT("f:/work/windows/wxUrlRefresh/data/result.xml"); wxString data_path = wxGetCwd() + "/data/"; wxString path1 = data_path + "_tmp.xml"; if (!wxDirExists(data_path)) wxMkdir(data_path); pBuffer = (char*)calloc(content.Length()+1, 1); wxStrncpy(pBuffer, content, content.Len()+1); wxLogMessage("Run Tidy!"); TidyBuffer output; TidyBuffer errbuf; int rc = -1; Bool ok; TidyDoc tdoc = tidyCreate(); // Initialize "document" tidyBufInit( &output ); tidyBufInit( &errbuf ); //printf( "Tidying:\t\%s\\n", input ); tidySetCharEncoding(tdoc, "utf8"); ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, pBuffer ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc >= 0 ) { #ifdef _DEBUG //if ( rc > 0 ) // WriteAllToFile("f:/work/windows/wxUrlRefresh/data/error.xml", (char*)errbuf.bp, errbuf.size); WriteAllToFile(path1, (char*)output.bp, output.size); #endif } else wxLogError("tidyFail"); tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); if (pBuffer) free(pBuffer); wxLogMessage("Fetch data!"); // 解析数据 TiXmlDocument doc(path1); if (doc.LoadFile()) { // root CTiXmlProxyVistor vistor(&m_array); TiXmlElement *pRoot = doc.RootElement(); pRoot->Accept(&vistor); } else { wxLogMessage("shit"); return -2; } return 0; }
int CCaHtmlParse::ParseCaHtmlFlights(std::list<SCaLowPriceFlightDetail*> & listFlight, const std::string& strHtmlData, const CStringA & straDCode, const CStringA & straACode, const SCaLowPriceFlightInfo* pLowPriceFlightInfo) { TidyDoc doc = tidyCreate(); tidySetCharEncoding(doc,"raw"); tidyParseString(doc,strHtmlData.c_str()); TidyNode tnRoot = tidyGetRoot(doc); TidyNode tFlightTab; TidyNode tdChild; int nIndexTd = 0; CTime tCurrent = CTime::GetCurrentTime(); SCaLowPriceFlightDetail *pfindFlight = NULL; if (FindNode(tnRoot,"class","CA_table mt_10 clear",tFlightTab)) { //循环解析结算价,tblPolicy下的每一个子节点即为一条结算价信息 TidyNode trFlight; int nIndexTr = 0; BOOL bValid = FALSE; CStringA straDPortCode = straDCode; CStringA straAPortCode = straACode; CStringA straFlightNo(""); CStringA straFlightStartDate(""); CStringA straSaleEndDate(""); CStringA straSaleEndTime(""); CStringA straFlightStartTime(""); UINT uPrice = 0; UINT uRemainTicket = 0; for ( trFlight = tidyGetChild(tFlightTab); trFlight; trFlight = tidyGetNext(trFlight) ) { if (0 == nIndexTr)//跳过表头 { nIndexTr++; continue; } nIndexTd = 0; bValid = FALSE; straFlightNo = ""; straFlightStartDate = ""; straSaleEndDate = ""; straSaleEndTime = ""; straFlightStartTime = ""; uPrice = 0; uRemainTicket = 0; for ( tdChild = tidyGetChild(trFlight); tdChild; tdChild = tidyGetNext(tdChild) ) { switch(nIndexTd) { case 0: { //选择,是否为disabled bValid = __IsFlightValid(tdChild); TRACE(_T("Flight valid:%d-"), bValid); } break; case 1: { //日期/航班号 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); __GetFlightNoAndFlightStartDate(straFlightNo, straFlightStartDate, doc, tdChild); TRACE("date:%s, no:%s-", straFlightStartDate, straFlightNo); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); } break; case 2: { //起降时间 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); __GetFlightStartTime(straFlightStartTime, doc, tdChild); } break; case 3: { //机场 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); if (__IsTwoAirPort(straDCode, straACode)) { __GetAirPortCode(straDPortCode, straAPortCode, doc, tdChild); if(straDPortCode.IsEmpty()) straDPortCode = straDCode; if(straAPortCode.IsEmpty()) straAPortCode = straACode; TRACE("%s->%s-", straDPortCode, straAPortCode); } } break; case 4: { //销售结束日期,时间 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); __GetSaleEndDate(straSaleEndDate, straSaleEndTime, doc, tdChild); TRACE("sale end date:%s, %s-", straSaleEndDate, straSaleEndTime); } break; case 5: { //团购价 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); //CStringA straSetPrice = GetNodeContent(doc, tdChild); //double fSetPrice = atof(straSetPrice.GetBuffer(0)); //straSetPrice.ReleaseBuffer(); //tidyRelease(doc); //return fSetPrice; __GetPriceAndRamainTicket(&uPrice, &uRemainTicket, doc, tdChild); TRACE("price:%d, remain %d seats", uPrice, uRemainTicket); } break; } nIndexTd++; } TRACE(_T("\r\n")); //截至日期之后的航班不抓取 //得到起飞日期 int nFlightStartYear = 2014; int nFlightStartMonth = 12; int nFlightStartDay = 12; GetYearMonthDay(straFlightStartDate, &nFlightStartYear, &nFlightStartMonth, &nFlightStartDay); CTime tStart(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 0, 0, 0); //if (!m_bGetAllCaTuanFlight) //{ // if (tStart > m_tGetEndTime) // continue; //} // //double d6 = pLowPriceFlightInfo->iMinHangPrice * 0.6; //UINT u6 = (UINT)d6; ////6折以上普通团购退改签要收费(低价申请不受限制),所以不上 //if (uPrice > d6 && CA_TUAN_PRODUCT == pLowPriceFlightInfo->iProductType) //{ // bValid = FALSE; // uRemainTicket = 0; // continue; //} //相同日期、时间、班次的航班,只取最低价 BOOL bFind = __findCaFlight(&pfindFlight, straFlightStartDate, straDPortCode, straAPortCode, straFlightNo, listFlight); if (bFind) { int nCurPrice = (int)uPrice; //当前解析出的这个比上次解析出的便宜 if(pfindFlight->nPrice > nCurPrice) { if (uRemainTicket > m_nMinTicketWarnNum) { //当前票的数量充足时,用当前票的数量更新上次解析出的数量 pfindFlight->nRemainSeat = uRemainTicket; pfindFlight->nPrice = nCurPrice; pfindFlight = NULL; } } else //(pfindFlight->nPrice <= nCurPrice) { if(pfindFlight->nRemainSeat <= m_nMinTicketWarnNum) { pfindFlight->nRemainSeat = uRemainTicket; pfindFlight->nPrice = nCurPrice; pfindFlight = NULL; } } continue; } //保存解析出来的航班信息,调用者负责释放内存 if (bValid) { SCaLowPriceFlightDetail* pDetail = new SCaLowPriceFlightDetail; pDetail->straCompany = "CA"; pDetail->straFromCityCode = straDPortCode; pDetail->straToCityCode = straAPortCode; pDetail->straFlightNo = straFlightNo; pDetail->straFromDate = straFlightStartDate; //由于携程订单进入需要一定的时间,国航下班16:00下班,所以当天的票,第2天12:00之前的票,销售结束时间提前30分钟, //取销售间隔 int nSaleEndYear = 2014; int nSaleEndMonth = 12; int nSaleEndDay = 12; GetYearMonthDay(straSaleEndDate, &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay); int nSaleEndHour = 12; int nSaleEndMin = 0; GetHourMinSec(straSaleEndTime, &nSaleEndHour, &nSaleEndMin); CTime tSaleEndDate(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, 0); CTimeSpan tSpan = tSaleEndDate - tCurrent; //end 取销售间隔 //得到起飞时间 int nFlightStartHour = 12; int nFlightStartMin = 0; GetHourMinSec(straFlightStartTime, &nFlightStartHour, &nFlightStartMin); CTime tFlightStartTime(nFlightStartYear, nFlightStartMonth, nFlightStartDay, nFlightStartHour, nFlightStartMin, 0); CTime tTimeKey(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 12, 0, 0); //end 得到起飞时间 //今明两天的、起飞时间在12点之前、且是低价申请的,销售结束时间为 前一天的官网销售结束的前30分钟 if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (1 == tSpan.GetDays()))//明天的的低价申请 { if(tFlightStartTime <= tTimeKey)//明天12起飞的低价申请, 今天下午3:25前有效(国航4点下班) { pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay()); CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 25, 0); pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0); } else//明天12后起飞的低价申请,明早可以出票 { pDetail->straSaleEndDate = straSaleEndDate; pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0); } } else if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (tSpan.GetDays() < 1))//今天的的低价申请,今天下午3:30前有效(国航4点下班) { pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay()); CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 30, 0); pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0); } else//普通团购,后天及以后的低价申请 { pDetail->straSaleEndDate = straSaleEndDate; pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0); } //政策销售时间到,删除政策 GetYearMonthDay(pDetail->straSaleEndDate, &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay); int nSaleEndSec = 0; GetHourMinSec(pDetail->straSaleEndTime, &nSaleEndHour, &nSaleEndMin, &nSaleEndSec); CTime tPolicyDeleteTime(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, nSaleEndSec); if (tCurrent >= tPolicyDeleteTime) uRemainTicket = 0; pDetail->nPrice = uPrice; pDetail->nProductId = pLowPriceFlightInfo->iProductId; pDetail->nRemainSeat = uRemainTicket; pDetail->nProductType = pLowPriceFlightInfo->iProductType; listFlight.push_back(pDetail); } } } tidyRelease(doc); return -1.0; }
void FetchTaskHandler(const FetchTaskMessage &message, const Theron::Address from) { std::cout<<"get data.................."<<std::endl; std::string url=message.fi->url; char curl_errbuf[CURL_ERROR_SIZE]; CURL *curl = curl_easy_init(); int err; fetch::FetchResult *result=new fetch::FetchResult(); fetch::FetchInfo fi=*(message.fi); delete message.fi; result->type=fetch::UNKNOWN; result->url=fi.url; result->pathList=fi.pathList; result->attMap=fi.attMap; int errCode=0; if(curl!=NULL) { curl_easy_setopt(curl, CURLOPT_URL,url.c_str()); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); // curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); // curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10); curl_easy_setopt(curl, CURLOPT_NOSIGNAL,1); curl_easy_setopt(curl, CURLOPT_TIMEOUT ,60); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fetch_write); std::stringstream iss; curl_easy_setopt(curl, CURLOPT_WRITEDATA, &iss); err=curl_easy_perform(curl); if ( !err ) { std::map<std::string,std::string>::iterator efit=fi.attMap.find("encode"); char *resStr=new char[iss.str().length()*3]; memset(resStr,0,iss.str().length()*3); if(efit!=fi.attMap.end()) { UErrorCode error = U_ZERO_ERROR; ucnv_convert("UTF-8",efit->second.c_str(),resStr, iss.str().length()*3, iss.str().c_str(), iss.str().length(), &error ); }else { strcpy(resStr,iss.str().c_str()); } TidyDoc tdoc; // TidyBuffer tidy_errbuf = {0}; // TidyBuffer docbuf = {0}; tdoc = tidyCreate(); tidyOptSetInt(tdoc, TidyWrapLen, 4096); // tidySetErrorBuffer( tdoc, &tidy_errbuf ); tidyOptSetBool( tdoc, TidyXmlOut, yes ); tidyOptSetBool(tdoc, TidyQuoteNbsp, no); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ // tidyOptSetBool( tdoc, TidyXmlDecl, yes ); tidyOptSetBool(tdoc, TidyQuiet, yes); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetValue(tdoc,TidyDoctype,"omit"); tidyOptSetBool(tdoc, TidyFixBackslash, yes); tidyOptSetBool(tdoc, TidyMark, no); tidySetCharEncoding(tdoc,"utf8"); // tidyBufInit(&docbuf); // err = tidyParseBuffer(tdoc, &docbuf); err = tidyParseString(tdoc, resStr); if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { // err=tidyRunDiagnostics(tdoc); /* load tidy error buffer */ // if ( err >= 0 ) { // std::cout<<"tidy error:"<<tidy_errbuf.bp<<std::endl; /* show errors */ TidyBuffer outbuf = {0}; tidyBufInit(&outbuf); tidySaveBuffer( tdoc, &outbuf ); std::stringstream hss; hss<<(char*)outbuf.bp; tidyBufFree(&outbuf); result->result=hss.str(); // } // else // { // errCode=-5; // } }else { errCode=-4; } }else { errCode=-3; } // tidyBufFree(&tidy_errbuf); // tidyBufFree(&docbuf); tidyRelease(tdoc); delete [] resStr; }else { errCode=-2; } }else { errCode=-1; } if(errCode<0) { std::stringstream ess; ess<<errCode; result->type=fetch::ERROR; result->result=ess.str(); } curl_easy_cleanup(curl); Send(FetchResultMessage(result), from); }
static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file) { char *enc = NULL; size_t enc_len = 0; zend_bool use_include_path = 0; TidyDoc doc; TidyBuffer *errbuf; zend_string *data, *arg1; zval *config = NULL; if (is_file) { if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) { RETURN_FALSE; } if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) { RETURN_FALSE; } } else { if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) { RETURN_FALSE; } data = arg1; } if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) { php_error_docref(NULL, E_WARNING, "Input string is too long"); RETURN_FALSE; } doc = tidyCreate(); errbuf = emalloc(sizeof(TidyBuffer)); tidyBufInit(errbuf); if (tidySetErrorBuffer(doc, errbuf) != 0) { tidyBufFree(errbuf); efree(errbuf); tidyRelease(doc); php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer"); } tidyOptSetBool(doc, TidyForceOutput, yes); tidyOptSetBool(doc, TidyMark, no); TIDY_SET_DEFAULT_CONFIG(doc); if (config) { TIDY_APPLY_CONFIG_ZVAL(doc, config); } if(enc_len) { if (tidySetCharEncoding(doc, enc) < 0) { php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc); RETVAL_FALSE; } } if (data) { TidyBuffer buf; tidyBufInit(&buf); tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint)ZSTR_LEN(data)); if (tidyParseBuffer(doc, &buf) < 0) { php_error_docref(NULL, E_WARNING, "%s", errbuf->bp); RETVAL_FALSE; } else { if (tidyCleanAndRepair(doc) >= 0) { TidyBuffer output; tidyBufInit(&output); tidySaveBuffer (doc, &output); FIX_BUFFER(&output); RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0); tidyBufFree(&output); } else { RETVAL_FALSE; } } } if (is_file) { zend_string_release(data); } tidyBufFree(errbuf); efree(errbuf); tidyRelease(doc); }
int main( int argc, char** argv ) { ctmbstr prog = argv[0]; ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL; TidyDoc tdoc = tidyCreate(); int status = 0; uint contentErrors = 0; uint contentWarnings = 0; uint accessWarnings = 0; errout = stderr; /* initialize to stderr */ status = 0; #ifdef TIDY_CONFIG_FILE if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) ) { status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_CONFIG_FILE, status); } #endif /* TIDY_CONFIG_FILE */ /* look for env var "HTML_TIDY" */ /* then for ~/.tidyrc (on platforms defining $HOME) */ if ( (cfgfil = getenv("HTML_TIDY")) != NULL ) { status = tidyLoadConfig( tdoc, cfgfil ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status); } #ifdef TIDY_USER_CONFIG_FILE else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) ) { status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE ); if ( status != 0 ) fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_USER_CONFIG_FILE, status); } #endif /* TIDY_USER_CONFIG_FILE */ /* read command line */ while ( argc > 0 ) { if (argc > 1 && argv[1][0] == '-') { /* support -foo and --foo */ ctmbstr arg = argv[1] + 1; if ( strcasecmp(arg, "xml") == 0) tidyOptSetBool( tdoc, TidyXmlTags, yes ); else if ( strcasecmp(arg, "asxml") == 0 || strcasecmp(arg, "asxhtml") == 0 ) { tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); } else if ( strcasecmp(arg, "ashtml") == 0 ) tidyOptSetBool( tdoc, TidyHtmlOut, yes ); else if ( strcasecmp(arg, "indent") == 0 ) { tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) tidyOptResetToDefault( tdoc, TidyIndentSpaces ); } else if ( strcasecmp(arg, "omit") == 0 ) tidyOptSetBool( tdoc, TidyHideEndTags, yes ); else if ( strcasecmp(arg, "upper") == 0 ) tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); else if ( strcasecmp(arg, "clean") == 0 ) tidyOptSetBool( tdoc, TidyMakeClean, yes ); else if ( strcasecmp(arg, "bare") == 0 ) tidyOptSetBool( tdoc, TidyMakeBare, yes ); else if ( strcasecmp(arg, "raw") == 0 || strcasecmp(arg, "ascii") == 0 || strcasecmp(arg, "latin0") == 0 || strcasecmp(arg, "latin1") == 0 || strcasecmp(arg, "utf8") == 0 || #ifndef NO_NATIVE_ISO2022_SUPPORT strcasecmp(arg, "iso2022") == 0 || #endif #if SUPPORT_UTF16_ENCODINGS strcasecmp(arg, "utf16le") == 0 || strcasecmp(arg, "utf16be") == 0 || strcasecmp(arg, "utf16") == 0 || #endif #if SUPPORT_ASIAN_ENCODINGS strcasecmp(arg, "shiftjis") == 0 || strcasecmp(arg, "big5") == 0 || #endif strcasecmp(arg, "mac") == 0 || strcasecmp(arg, "win1252") == 0 || strcasecmp(arg, "ibm858") == 0 ) { tidySetCharEncoding( tdoc, arg ); } else if ( strcasecmp(arg, "numeric") == 0 ) tidyOptSetBool( tdoc, TidyNumEntities, yes ); else if ( strcasecmp(arg, "modify") == 0 || strcasecmp(arg, "change") == 0 || /* obsolete */ strcasecmp(arg, "update") == 0 ) /* obsolete */ { tidyOptSetBool( tdoc, TidyWriteBack, yes ); } else if ( strcasecmp(arg, "errors") == 0 ) tidyOptSetBool( tdoc, TidyShowMarkup, no ); else if ( strcasecmp(arg, "quiet") == 0 ) tidyOptSetBool( tdoc, TidyQuiet, yes ); else if ( strcasecmp(arg, "help") == 0 || strcasecmp(arg, "h") == 0 || *arg == '?' ) { help( prog ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "xml-help") == 0) { xml_help( ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "help-config") == 0 ) { optionhelp( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "xml-config") == 0 ) { XMLoptionhelp( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "show-config") == 0 ) { optionvalues( tdoc ); tidyRelease( tdoc ); return 0; /* success */ } else if ( strcasecmp(arg, "config") == 0 ) { if ( argc >= 3 ) { ctmbstr post; tidyLoadConfig( tdoc, argv[2] ); /* Set new error output stream if setting changed */ post = tidyOptGetValue( tdoc, TidyErrFile ); if ( post && (!errfil || !samefile(errfil, post)) ) { errfil = post; errout = tidySetErrorFile( tdoc, post ); } --argc; ++argv; } } #if SUPPORT_ASIAN_ENCODINGS else if ( strcasecmp(arg, "language") == 0 || strcasecmp(arg, "lang") == 0 ) { if ( argc >= 3 ) { tidyOptSetValue( tdoc, TidyLanguage, argv[2] ); --argc; ++argv; } } #endif else if ( strcasecmp(arg, "output") == 0 || strcasecmp(arg, "-output-file") == 0 || strcasecmp(arg, "o") == 0 ) { if ( argc >= 3 ) { tidyOptSetValue( tdoc, TidyOutFile, argv[2] ); --argc; ++argv; } } else if ( strcasecmp(arg, "file") == 0 || strcasecmp(arg, "-file") == 0 || strcasecmp(arg, "f") == 0 ) { if ( argc >= 3 ) { errfil = argv[2]; errout = tidySetErrorFile( tdoc, errfil ); --argc; ++argv; } } else if ( strcasecmp(arg, "wrap") == 0 || strcasecmp(arg, "-wrap") == 0 || strcasecmp(arg, "w") == 0 ) { if ( argc >= 3 ) { uint wraplen = 0; int nfields = sscanf( argv[2], "%u", &wraplen ); tidyOptSetInt( tdoc, TidyWrapLen, wraplen ); if (nfields > 0) { --argc; ++argv; } } } else if ( strcasecmp(arg, "version") == 0 || strcasecmp(arg, "-version") == 0 || strcasecmp(arg, "v") == 0 ) { version(); tidyRelease( tdoc ); return 0; /* success */ } else if ( strncmp(argv[1], "--", 2 ) == 0) { if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) ) { /* Set new error output stream if setting changed */ ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile ); if ( post && (!errfil || !samefile(errfil, post)) ) { errfil = post; errout = tidySetErrorFile( tdoc, post ); } ++argv; --argc; } } #if SUPPORT_ACCESSIBILITY_CHECKS else if ( strcasecmp(arg, "access") == 0 ) { if ( argc >= 3 ) { uint acclvl = 0; int nfields = sscanf( argv[2], "%u", &acclvl ); tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl ); if (nfields > 0) { --argc; ++argv; } } } #endif else { uint c; ctmbstr s = argv[1]; while ( (c = *++s) != '\0' ) { switch ( c ) { case 'i': tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) tidyOptResetToDefault( tdoc, TidyIndentSpaces ); break; /* Usurp -o for output file. Anyone hiding end tags? case 'o': tidyOptSetBool( tdoc, TidyHideEndTags, yes ); break; */ case 'u': tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); break; case 'c': tidyOptSetBool( tdoc, TidyMakeClean, yes ); break; case 'b': tidyOptSetBool( tdoc, TidyMakeBare, yes ); break; case 'n': tidyOptSetBool( tdoc, TidyNumEntities, yes ); break; case 'm': tidyOptSetBool( tdoc, TidyWriteBack, yes ); break; case 'e': tidyOptSetBool( tdoc, TidyShowMarkup, no ); break; case 'q': tidyOptSetBool( tdoc, TidyQuiet, yes ); break; default: unknownOption( c ); break; } } } --argc; ++argv; continue; } if ( argc > 1 ) { htmlfil = argv[1]; if ( tidyOptGetBool(tdoc, TidyEmacs) ) tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil ); status = tidyParseFile( tdoc, htmlfil ); } else { htmlfil = "stdin"; status = tidyParseStdin( tdoc ); } if ( status >= 0 ) status = tidyCleanAndRepair( tdoc ); if ( status >= 0 ) status = tidyRunDiagnostics( tdoc ); if ( status > 1 ) /* If errors, do we want to force output? */ status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 ); if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) ) { if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 ) status = tidySaveFile( tdoc, htmlfil ); else { ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile ); if ( outfil ) status = tidySaveFile( tdoc, outfil ); else status = tidySaveStdout( tdoc ); } } contentErrors += tidyErrorCount( tdoc ); contentWarnings += tidyWarningCount( tdoc ); accessWarnings += tidyAccessWarningCount( tdoc ); --argc; ++argv; if ( argc <= 1 ) break; } if (!tidyOptGetBool(tdoc, TidyQuiet) && errout == stderr && !contentErrors) fprintf(errout, "\n"); if (contentErrors + contentWarnings > 0 && !tidyOptGetBool(tdoc, TidyQuiet)) tidyErrorSummary(tdoc); if (!tidyOptGetBool(tdoc, TidyQuiet)) tidyGeneralInfo(tdoc); /* called to free hash tables etc. */ tidyRelease( tdoc ); /* return status can be used by scripts */ if ( contentErrors > 0 ) return 2; if ( contentWarnings > 0 ) return 1; /* 0 signifies all is ok */ return 0; }
int main(int argc, char *argv[]) { errno = 0; //CL args const char *dbfile = NULL, *stn_departure = NULL, *stn_arrival = NULL; int ch,res, consecutive_success = 0; //Handles CURL *curl_hdl = NULL; sqlite3 *db_hdl = NULL; TidyDoc tdoc = NULL; //Query initialisers char *link, *new_link; struct tm tm_dep; time_t last_time_dep = 0; int requery = 0; //Parse result holders struct train_list_t *trains = NULL; size_t n, ntrains; //Output char str_time_dep[20]; size_t total = 0; //Parse cmdline while( (ch = getopt(argc, argv, "d:f:t:")) != -1 ) { debug("ch = %d", ch); switch(ch) { case 'd': debug("d %s", optarg); dbfile = optarg; break; case 'f': debug("f %s", optarg); stn_departure = optarg; break; case 't': debug("t %s", optarg); stn_arrival = optarg; break; case '?': if(optopt=='d' || optopt=='f' || optopt=='t') { log_info("Missing argument for option -%c", optopt); goto usage; } else if(isprint(optopt)) { log_info("Unknown option '-%c'", optopt); } else { log_info("Unknown option character '\\x%x'", optopt); } break; default: debug("err got c=%d (opterr=%d, optopt=%c, optind=%d, optarg=%s)", ch, opterr, optopt, optind, optarg); goto usage; } } if(!dbfile || !stn_departure || !stn_arrival) goto usage; debug("Starting %s with dbfile='%s', stn_dep='%s', stn_arr='%s'", argv[0], dbfile, stn_departure, stn_arrival); //Set up Curl check(curl_tidy_init(&curl_hdl)==0,"Failed to initialise curl"); debug("curl_hdl %p", curl_hdl); //Set up database and get names res = database_init(&db_hdl, dbfile); check(res==0, "Failed to open database"); //Send search query time_t now = time(NULL); localtime_r(&now, &tm_dep); tm_dep.tm_hour++; res = sncf_post_form(curl_hdl, &tdoc, &link, &tm_dep, stn_departure, stn_arrival); check(res==0, "Failed to perform query"); debug("Initialized (%d) - link = %s", res, link); //Fetch, parse, print while(1) { debug("Next link %s", link); tidyRelease(tdoc); res = curl_tidy_get(curl_hdl, link, &tdoc); check(res == 0, "failed to fetch results page"); res = sncf_find_next_results_link(tdoc, &new_link); check(res == 0, "failed to get link to next results"); /* * An error in the SNCF site results in occasionally being * sent to the same results page. This means getting stuck * in a loop. If that happens, a workaround is to start a * new query and continue from there */ if(!strcmp(link, new_link)) { log_info("Next results page is the same as the current one (%d successes)", consecutive_success); if(consecutive_success <= 2) { log_info("less than 3 success before loop, this is the end"); break; } requery = 1; } if(requery) { if(requery == 1) log_info("requerying cos of link loop"); if(requery == 2) log_info("requerying cos of time travel"); requery = 0; localtime_r(&last_time_dep, &tm_dep); consecutive_success = 0; tidyRelease(tdoc); free(link); free(new_link); //FIXME: change tm_dep so the SNCF site is likely to handle it res = sncf_post_form(curl_hdl, &tdoc, &link, &tm_dep, stn_departure, stn_arrival); check(res==0, "Failed to perform query"); continue; } if(trains) { debug("last time dep = %lu - train time dep = %lu", last_time_dep, get_last_train(trains)->train.time_departure); } free_trains(trains); trains = NULL; ntrains = sncf_parse_results(db_hdl, tdoc, &trains); debug("found %lu trains", ntrains); //Check if we're getting the same results over and over again (only iff we have results (ntrains) // and only if last_time_dep was set before (check if it's not 0 as initialized)) if(last_time_dep && ntrains && get_last_train(trains)->train.time_departure < last_time_dep) { requery = 2; continue; } if(last_time_dep && ntrains && get_last_train(trains)->train.time_departure == last_time_dep) { log_info("Got the exact same results twice, finishing up"); break; } if(ntrains) { last_time_dep = get_last_train(trains)->train.time_departure; } else { log_info("No trains found, this is the end"); break; } n = train_store(db_hdl, trains); if(n!=ntrains) { log_info("only stored %lu out of %lu trains, aborting", n, ntrains); goto error; } debug("Stored all %lu trains", n); total+=n; #ifdef NDEBUG localtime_r(&get_last_train(trains)->train.time_departure, &tm_dep); strftime(str_time_dep, 20, "%e-%b-%Y %R", &tm_dep); printf("Processed %6lu trains - Last one departed at %s\r", total, str_time_dep); fflush(stdout); #else print_trains(db_hdl, trains, 0); #endif consecutive_success++; free(link); link = new_link; } free(link); free(new_link); error: if(tdoc) { tidySaveFile(tdoc, "dumpfile-exit.html"); tidyRelease(tdoc); } curl_tidy_cleanup(curl_hdl); database_cleanup(db_hdl); localtime_r(&get_last_train(trains)->train.time_departure, &tm_dep); strftime(str_time_dep, 20, "%e-%b-%Y %R", &tm_dep); log_info("Exiting after storing %lu trains (last one arriving %s)", total, str_time_dep); return 0; usage: printf( "Usage : %s -d <dbfile> -f <stn_dep> -t <stn_arr>\n" "\n" "\t<dbfile>\tThe sqlite3 database filename\n" "\t<stn_dep>\tThe departure station\n" "\t<stn_arr>\tThe arrival station\n" "\n", argv[0]); return 0; }
/*! * \fn static int TidyHtml(const char *pcSourcePage, string &sDestPage); * \brief 修补丢失、错误标签 * \param [in]待修补网页字符串 * \param [out]修补后的网页string * \return 结果码,==0修补正确,<0修补失败 * \date 2011-06-01 * \author nanjunxiao */ int Pretreat::TidyHtml(const char *pcSourcePage, std::string &sDestPage) { int iReturn = 0; TidyBuffer errbuf = {0}; TidyDoc tdoc; tmbstr pBuffer = NULL; try { if ( (pcSourcePage == NULL) || (strlen(pcSourcePage) ==0 ) ) { //cerr << "TidyHtml 输入页面为空!" << endl; throw (-1); } int iRet = -1; Bool bOk; uint uiBufLen; int iBufSize; tdoc = tidyCreate();// Initialize "document" bOk = tidyOptSetBool(tdoc, TidyXhtmlOut, yes);// Convert to XHTML if (bOk) { iRet = tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics } else { throw (-1); } if (iRet >= 0) { iRet = tidySetCharEncoding(tdoc,"utf8"); //Ensure dealing with gb2312 successfully } else { throw (-1); } if (iRet >= 0) { string htmlsrc = pcSourcePage; iRet = tidyParseString (tdoc, htmlsrc.c_str() ); // Parse the input } else { throw (-1); } if (iRet >= 0) { iRet = tidyCleanAndRepair(tdoc); //Tidy it up! } else { throw (-1); } if (iRet >= 0) { iRet = tidyRunDiagnostics(tdoc); //Kvetch } else { throw (-1); } if(iRet > 1) // If error, force output. { iRet = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? iRet : -1 ); } else if (iRet < 0) { throw (-1); } if (iRet >= 0) { // Pretty Print iBufSize = 1024 * 1024 * 5; uiBufLen = iBufSize; pBuffer = new char [iBufSize]; memset(pBuffer, '\0', iBufSize); iRet = tidySaveString(tdoc, pBuffer, &uiBufLen); } else { throw (-1); } if (iRet >= 0) { sDestPage = pBuffer; } else if (iRet == -ENOMEM) { //pBuffer 长度不够 //cerr << "TidyHtml pBuffer长度不够!" << endl; throw (-1); } else { throw (-1); } } catch(exception &err) { //cerr << "TidyHtml HtmlTidy修补页面失败! " << err.what() << endl; iReturn = -1; } catch(int iThrow) { if (iThrow < 0) { //cerr << "TidyHtml HtmlTidy修补页面失败!" << endl; } iReturn = iThrow; } catch(...) { //cerr << "TidyHtml HtmlTidy修补页面失败!" << endl; iReturn = -1; } tidyBufFree(&errbuf); tidyRelease(tdoc); if (pBuffer != NULL) { delete [] pBuffer; pBuffer = NULL; } return iReturn; }
HtmlTidy::~HtmlTidy() { tidyRelease(m_tidyDoc); tidyBufFree(&m_errorOutput); }
static PyObject *parseString(PyObject *self, PyObject *args) { char *cp; int i, len, list_size; TidyDoc tdoc; TidyOption option = TidyUnknownOption; PyObject *res = NULL, *arglist = NULL; PyObject *key_list = NULL, *item = NULL, *value = NULL; TidyBuffer output = {0}; TidyBuffer errbuf = {0}; if (!PyArg_ParseTuple(args, "s#|O", &cp, &len, &arglist)) return NULL; if (arglist && !PyDict_Check(arglist)) { PyErr_SetString(PyExc_TypeError, "Second argument must be a dictionary!"); return NULL; } tdoc = tidyCreate(); tidySetErrorBuffer(tdoc, &errbuf); if (!arglist) goto im_so_lazy; /* no args provided */ key_list = PyDict_Keys(arglist); list_size = PyList_Size(key_list); for (i = 0; i < list_size; i++) { item = PyList_GetItem(key_list, i); value = PyDict_GetItem(arglist, item); Py_INCREF(item); Py_INCREF(value); option = tidyGetOptionByName(tdoc, PyString_AsString(item)); if (option == TidyUnknownOption) { PyErr_Format(PyExc_KeyError, "Unknown tidy option '%s'", PyString_AsString(item)); TDOC_RETURN(); } switch (tidyOptGetType(option)) { case TidyString: PY_TO_TIDY(String_Check, Value, String_AsString, "a String"); break; case TidyInteger: PY_TO_TIDY(Int_Check, Int, Int_AsLong, "an Integer"); break; case TidyBoolean: PY_TO_TIDY(Int_Check, Bool, Int_AsLong, "a Boolean or an Integer"); break; default: { PyErr_Format(PyExc_RuntimeError, "Something strange happened, there is no option type %d", tidyOptGetType(option)); TDOC_RETURN(); } } Py_DECREF(item); Py_DECREF(value); } im_so_lazy: tidyParseString(tdoc, cp); tidyCleanAndRepair(tdoc); tidySaveBuffer(tdoc, &output); res = Py_BuildValue("s#", output.bp, output.size); tidyBufFree(&output); tidyBufFree(&errbuf); tidyRelease(tdoc); return res; }
static PyObject* elementtidy_fixup(PyObject* self, PyObject* args) { int rc; TidyDoc doc; TidyBuffer out = {0}; TidyBuffer err = {0}; PyObject* pyout; PyObject* pyerr; char* text; char* encoding = NULL; if (!PyArg_ParseTuple(args, "s|s:fixup", &text, &encoding)) return NULL; doc = tidyCreate(); /* options for nice XHTML output */ if (encoding) /* if an encoding is given, use it for both input and output */ tidyOptSetValue(doc, TidyCharEncoding, encoding); else /* if no encoding is given, use default input and utf-8 output */ tidyOptSetValue(doc, TidyOutCharEncoding, "utf8"); tidyOptSetBool(doc, TidyForceOutput, yes); tidyOptSetInt(doc, TidyWrapLen, 0); tidyOptSetBool(doc, TidyQuiet, yes); tidyOptSetBool(doc, TidyXhtmlOut, yes); tidyOptSetBool(doc, TidyXmlDecl, yes); tidyOptSetInt(doc, TidyIndentContent, 0); tidyOptSetBool(doc, TidyNumEntities, yes); rc = tidySetErrorBuffer(doc, &err); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidySetErrorBuffer failed"); goto error; } rc = tidyParseString(doc, text); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyParseString failed"); goto error; } rc = tidyCleanAndRepair(doc); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyCleanAndRepair failed"); goto error; } rc = tidyRunDiagnostics(doc); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed"); goto error; } rc = tidySaveBuffer(doc, &out); if (rc < 0) { PyErr_SetString(PyExc_IOError, "tidyRunDiagnostics failed"); goto error; } pyout = PyString_FromString(out.bp ? out.bp : ""); if (!pyout) goto error; pyerr = PyString_FromString(err.bp ? err.bp : ""); if (!pyerr) { Py_DECREF(pyout); goto error; } tidyBufFree(&out); tidyBufFree(&err); tidyRelease(doc); return Py_BuildValue("NN", pyout, pyerr); error: tidyBufFree(&out); tidyBufFree(&err); tidyRelease(doc); return NULL; }
HTMLTidy::~HTMLTidy() { tidyRelease( handle ); }
bool nuiHTML::Load(nglIStream& rStream, nglTextEncoding OverrideContentsEncoding, const nglString& rSourceURL) { if (!rSourceURL.IsEmpty()) SetSourceURL(rSourceURL); int res = -1; nglTextEncoding encoding = eUTF8; TidyDoc tdoc = NULL; { HTMLStream strm(rStream); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyShowMarkup, no); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetInt(tdoc, TidyShowErrors, 0); tidyOptSetBool(tdoc, TidyQuiet, yes); tidySetCharEncoding(tdoc, "utf8"); TidyInputSource source; tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF); res = tidyParseSource(tdoc, &source); if ( res >= 0 ) res = tidyCleanAndRepair(tdoc); // Tidy it up! if ( res >= 0 ) res = tidyRunDiagnostics(tdoc); // Kvetch if (OverrideContentsEncoding == eEncodingUnknown) { nglString encoding_string(GetEncodingString(tidyGetRoot(tdoc))); //ascii, latin1, raw, utf8, iso2022, mac, win1252, utf16le, utf16be, utf16, big5 shiftjis encoding = nuiGetTextEncodingFromString(encoding_string); } else { encoding = OverrideContentsEncoding; } } char* pStr = NULL; if (encoding != eUTF8) { // Release the doc to create a new one tidyRelease(tdoc); nglOMemory omem; rStream.SetPos(0, eStreamFromStart); rStream.PipeTo(omem); nglString decoded; decoded.Import(omem.GetBufferData(), omem.GetSize(), encoding); pStr = decoded.Export(eUTF8); nglIMemory imem(pStr, strlen(pStr)); HTMLStream strm(imem); tdoc = tidyCreate(); tidySetCharEncoding(tdoc, "utf8"); TidyInputSource source; tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF); res = tidyParseSource(tdoc, &source); if ( res >= 0 ) res = tidyCleanAndRepair(tdoc); // Tidy it up! if ( res >= 0 ) res = tidyRunDiagnostics(tdoc); // Kvetch } BuildTree(tdoc, tidyGetRoot(tdoc), eUTF8, mComputeStyle); tidyRelease(tdoc); if (pStr) free(pStr); return res < 2; }