int main(int argc, char **argv ) { CURL *curl; char curl_errbuf[CURL_ERROR_SIZE]; TidyDoc tdoc; TidyBuffer docbuf = {0}; TidyBuffer tidy_errbuf = {0}; int err; if ( argc == 2) { curl = curl_easy_init(); curl_easy_setopt(curl, CURLOPT_URL, argv[1]); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidySetErrorBuffer( tdoc, &tidy_errbuf ); tidyBufInit(&docbuf); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf); err=curl_easy_perform(curl); if ( !err ) { err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */ if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ if ( err >= 0 ) { dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */ fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */ } } } } else fprintf(stderr, "%s\n", curl_errbuf); /* clean-up */ curl_easy_cleanup(curl); tidyBufFree(&docbuf); tidyBufFree(&tidy_errbuf); tidyRelease(tdoc); return(err); } else printf( "usage: %s <url>\n", argv[0] ); return(0); }
int lua_tidy_toTable(lua_State*L) { pTidy t = toTidy(L,1); TidyNode node = tidyGetRoot(t->tdoc); if (!node) return 0; lua_newtable(L); traverseNodes(t->tdoc, L, node); return 1; }
static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type) { PHPTidyObj *newobj; TidyNode node; TIDY_FETCH_OBJECT; switch (node_type) { case is_root_node: node = tidyGetRoot(obj->ptdoc->doc); break; case is_html_node: node = tidyGetHtml(obj->ptdoc->doc); break; case is_head_node: node = tidyGetHead(obj->ptdoc->doc); break; case is_body_node: node = tidyGetBody(obj->ptdoc->doc); break; default: RETURN_NULL(); break; } if (!node) { RETURN_NULL(); } tidy_instanciate(tidy_ce_node, return_value); newobj = Z_TIDY_P(return_value); newobj->type = is_node; newobj->ptdoc = obj->ptdoc; newobj->node = node; newobj->ptdoc->ref_count++; tidy_add_default_properties(newobj, is_node); }
void parse_urls(const char *filename, const url_list_t *elem) { TidyDoc tdoc; int err; FILE *outfile = NULL; tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); tidyOptSetBool(tdoc, TidyMark, no); tidyOptSetBool(tdoc, TidyHideEndTags, yes); tidyOptSetBool(tdoc, TidyDropEmptyParas, no); tidyOptSetBool(tdoc, TidyJoinStyles, no); tidyOptSetBool(tdoc, TidyPreserveEntities, yes); tidyOptSetInt(tdoc, TidyMergeDivs, no); tidyOptSetInt(tdoc, TidyMergeSpans, no); tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidyOptSetValue(tdoc, TidyCharEncoding, "utf8"); tidySetReportFilter(tdoc, filter_cb); err = tidyParseFile(tdoc, filename); if (err >= 0) err = tidyCleanAndRepair(tdoc); if (err >= 0) { outfile = option_values.save_relative_links && !option_values.disable_save_tree ? fopen(filename, "w") : NULL; parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile); if (outfile) fclose(outfile); } tidyRelease(tdoc); }
bool nuiHTML::Load(nglIStream& rStream, nglTextEncoding OverrideContentsEncoding, const nglString& rSourceURL) { if (!rSourceURL.IsEmpty()) SetSourceURL(rSourceURL); int res = -1; nglTextEncoding encoding = eUTF8; TidyDoc tdoc = NULL; { HTMLStream strm(rStream); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyShowMarkup, no); tidyOptSetBool(tdoc, TidyShowWarnings, no); tidyOptSetInt(tdoc, TidyShowErrors, 0); tidyOptSetBool(tdoc, TidyQuiet, yes); tidySetCharEncoding(tdoc, "utf8"); TidyInputSource source; tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF); res = tidyParseSource(tdoc, &source); if ( res >= 0 ) res = tidyCleanAndRepair(tdoc); // Tidy it up! if ( res >= 0 ) res = tidyRunDiagnostics(tdoc); // Kvetch if (OverrideContentsEncoding == eEncodingUnknown) { nglString encoding_string(GetEncodingString(tidyGetRoot(tdoc))); //ascii, latin1, raw, utf8, iso2022, mac, win1252, utf16le, utf16be, utf16, big5 shiftjis encoding = nuiGetTextEncodingFromString(encoding_string); } else { encoding = OverrideContentsEncoding; } } char* pStr = NULL; if (encoding != eUTF8) { // Release the doc to create a new one tidyRelease(tdoc); nglOMemory omem; rStream.SetPos(0, eStreamFromStart); rStream.PipeTo(omem); nglString decoded; decoded.Import(omem.GetBufferData(), omem.GetSize(), encoding); pStr = decoded.Export(eUTF8); nglIMemory imem(pStr, strlen(pStr)); HTMLStream strm(imem); tdoc = tidyCreate(); tidySetCharEncoding(tdoc, "utf8"); TidyInputSource source; tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF); res = tidyParseSource(tdoc, &source); if ( res >= 0 ) res = tidyCleanAndRepair(tdoc); // Tidy it up! if ( res >= 0 ) res = tidyRunDiagnostics(tdoc); // Kvetch } BuildTree(tdoc, tidyGetRoot(tdoc), eUTF8, mComputeStyle); tidyRelease(tdoc); if (pStr) free(pStr); return res < 2; }
static void traverseTidy(void) { traverseNode(tidyGetRoot(tdoc), 0); }
// returns the root node object. int lua_tidy_getRootNode ( lua_State *L) { pTidy t = toTidy(L,1); TidyNode tn = tidyGetRoot(t->tdoc); return push_node(L, tn, t); }
int CCaHtmlParse::ParseCaHtmlFlights(std::list<SCaLowPriceFlightDetail*> & listFlight, const std::string& strHtmlData, const CStringA & straDCode, const CStringA & straACode, const SCaLowPriceFlightInfo* pLowPriceFlightInfo) { TidyDoc doc = tidyCreate(); tidySetCharEncoding(doc,"raw"); tidyParseString(doc,strHtmlData.c_str()); TidyNode tnRoot = tidyGetRoot(doc); TidyNode tFlightTab; TidyNode tdChild; int nIndexTd = 0; CTime tCurrent = CTime::GetCurrentTime(); SCaLowPriceFlightDetail *pfindFlight = NULL; if (FindNode(tnRoot,"class","CA_table mt_10 clear",tFlightTab)) { //循环解析结算价,tblPolicy下的每一个子节点即为一条结算价信息 TidyNode trFlight; int nIndexTr = 0; BOOL bValid = FALSE; CStringA straDPortCode = straDCode; CStringA straAPortCode = straACode; CStringA straFlightNo(""); CStringA straFlightStartDate(""); CStringA straSaleEndDate(""); CStringA straSaleEndTime(""); CStringA straFlightStartTime(""); UINT uPrice = 0; UINT uRemainTicket = 0; for ( trFlight = tidyGetChild(tFlightTab); trFlight; trFlight = tidyGetNext(trFlight) ) { if (0 == nIndexTr)//跳过表头 { nIndexTr++; continue; } nIndexTd = 0; bValid = FALSE; straFlightNo = ""; straFlightStartDate = ""; straSaleEndDate = ""; straSaleEndTime = ""; straFlightStartTime = ""; uPrice = 0; uRemainTicket = 0; for ( tdChild = tidyGetChild(trFlight); tdChild; tdChild = tidyGetNext(tdChild) ) { switch(nIndexTd) { case 0: { //选择,是否为disabled bValid = __IsFlightValid(tdChild); TRACE(_T("Flight valid:%d-"), bValid); } break; case 1: { //日期/航班号 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); __GetFlightNoAndFlightStartDate(straFlightNo, straFlightStartDate, doc, tdChild); TRACE("date:%s, no:%s-", straFlightStartDate, straFlightNo); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); } break; case 2: { //起降时间 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); __GetFlightStartTime(straFlightStartTime, doc, tdChild); } break; case 3: { //机场 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); if (__IsTwoAirPort(straDCode, straACode)) { __GetAirPortCode(straDPortCode, straAPortCode, doc, tdChild); if(straDPortCode.IsEmpty()) straDPortCode = straDCode; if(straAPortCode.IsEmpty()) straAPortCode = straACode; TRACE("%s->%s-", straDPortCode, straAPortCode); } } break; case 4: { //销售结束日期,时间 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); __GetSaleEndDate(straSaleEndDate, straSaleEndTime, doc, tdChild); TRACE("sale end date:%s, %s-", straSaleEndDate, straSaleEndTime); } break; case 5: { //团购价 //dumpNode(tdChild, 0); //TRACE(_T("\r\n")); //TRACE("%s\r\n", GetNodeContent(doc, tdChild)); //CStringA straSetPrice = GetNodeContent(doc, tdChild); //double fSetPrice = atof(straSetPrice.GetBuffer(0)); //straSetPrice.ReleaseBuffer(); //tidyRelease(doc); //return fSetPrice; __GetPriceAndRamainTicket(&uPrice, &uRemainTicket, doc, tdChild); TRACE("price:%d, remain %d seats", uPrice, uRemainTicket); } break; } nIndexTd++; } TRACE(_T("\r\n")); //截至日期之后的航班不抓取 //得到起飞日期 int nFlightStartYear = 2014; int nFlightStartMonth = 12; int nFlightStartDay = 12; GetYearMonthDay(straFlightStartDate, &nFlightStartYear, &nFlightStartMonth, &nFlightStartDay); CTime tStart(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 0, 0, 0); //if (!m_bGetAllCaTuanFlight) //{ // if (tStart > m_tGetEndTime) // continue; //} // //double d6 = pLowPriceFlightInfo->iMinHangPrice * 0.6; //UINT u6 = (UINT)d6; ////6折以上普通团购退改签要收费(低价申请不受限制),所以不上 //if (uPrice > d6 && CA_TUAN_PRODUCT == pLowPriceFlightInfo->iProductType) //{ // bValid = FALSE; // uRemainTicket = 0; // continue; //} //相同日期、时间、班次的航班,只取最低价 BOOL bFind = __findCaFlight(&pfindFlight, straFlightStartDate, straDPortCode, straAPortCode, straFlightNo, listFlight); if (bFind) { int nCurPrice = (int)uPrice; //当前解析出的这个比上次解析出的便宜 if(pfindFlight->nPrice > nCurPrice) { if (uRemainTicket > m_nMinTicketWarnNum) { //当前票的数量充足时,用当前票的数量更新上次解析出的数量 pfindFlight->nRemainSeat = uRemainTicket; pfindFlight->nPrice = nCurPrice; pfindFlight = NULL; } } else //(pfindFlight->nPrice <= nCurPrice) { if(pfindFlight->nRemainSeat <= m_nMinTicketWarnNum) { pfindFlight->nRemainSeat = uRemainTicket; pfindFlight->nPrice = nCurPrice; pfindFlight = NULL; } } continue; } //保存解析出来的航班信息,调用者负责释放内存 if (bValid) { SCaLowPriceFlightDetail* pDetail = new SCaLowPriceFlightDetail; pDetail->straCompany = "CA"; pDetail->straFromCityCode = straDPortCode; pDetail->straToCityCode = straAPortCode; pDetail->straFlightNo = straFlightNo; pDetail->straFromDate = straFlightStartDate; //由于携程订单进入需要一定的时间,国航下班16:00下班,所以当天的票,第2天12:00之前的票,销售结束时间提前30分钟, //取销售间隔 int nSaleEndYear = 2014; int nSaleEndMonth = 12; int nSaleEndDay = 12; GetYearMonthDay(straSaleEndDate, &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay); int nSaleEndHour = 12; int nSaleEndMin = 0; GetHourMinSec(straSaleEndTime, &nSaleEndHour, &nSaleEndMin); CTime tSaleEndDate(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, 0); CTimeSpan tSpan = tSaleEndDate - tCurrent; //end 取销售间隔 //得到起飞时间 int nFlightStartHour = 12; int nFlightStartMin = 0; GetHourMinSec(straFlightStartTime, &nFlightStartHour, &nFlightStartMin); CTime tFlightStartTime(nFlightStartYear, nFlightStartMonth, nFlightStartDay, nFlightStartHour, nFlightStartMin, 0); CTime tTimeKey(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 12, 0, 0); //end 得到起飞时间 //今明两天的、起飞时间在12点之前、且是低价申请的,销售结束时间为 前一天的官网销售结束的前30分钟 if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (1 == tSpan.GetDays()))//明天的的低价申请 { if(tFlightStartTime <= tTimeKey)//明天12起飞的低价申请, 今天下午3:25前有效(国航4点下班) { pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay()); CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 25, 0); pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0); } else//明天12后起飞的低价申请,明早可以出票 { pDetail->straSaleEndDate = straSaleEndDate; pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0); } } else if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (tSpan.GetDays() < 1))//今天的的低价申请,今天下午3:30前有效(国航4点下班) { pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay()); CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 30, 0); pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0); } else//普通团购,后天及以后的低价申请 { pDetail->straSaleEndDate = straSaleEndDate; pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0); } //政策销售时间到,删除政策 GetYearMonthDay(pDetail->straSaleEndDate, &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay); int nSaleEndSec = 0; GetHourMinSec(pDetail->straSaleEndTime, &nSaleEndHour, &nSaleEndMin, &nSaleEndSec); CTime tPolicyDeleteTime(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, nSaleEndSec); if (tCurrent >= tPolicyDeleteTime) uRemainTicket = 0; pDetail->nPrice = uPrice; pDetail->nProductId = pLowPriceFlightInfo->iProductId; pDetail->nRemainSeat = uRemainTicket; pDetail->nProductType = pLowPriceFlightInfo->iProductType; listFlight.push_back(pDetail); } } } tidyRelease(doc); return -1.0; }
int main(int argc, char **argv ) { CURL *curl; char curl_errbuf[CURL_ERROR_SIZE]; char url[URL_BUF_SIZE]; char *username; TidyDoc tdoc; TidyBuffer docbuf = {0}; TidyBuffer tidy_errbuf = {0}; int err; if ( argc == 2) { username = argv[1]; } else { username = "******"; } WeatherData data; snprintf(url, URL_BUF_SIZE, "http://www.weatherlink.com/user/%s/index.php?view=summary&headers=0&type=2", username); curl = curl_easy_init(); curl_easy_setopt(curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidySetErrorBuffer( tdoc, &tidy_errbuf ); tidyBufInit(&docbuf); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf); err=curl_easy_perform(curl); if ( !err ) { err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */ if ( err >= 0 ) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if ( err >= 0 ) { dumpNode( tdoc, tidyGetRoot(tdoc), 0, &data ); /* walk the tree */ //err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ //if ( err >= 0 ) //{ //dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */ // fprintf(stderr, ">> %s\n", tidy_errbuf.bp); /* show errors */ //} } } } else { fprintf(stderr, "%s\n", curl_errbuf); } printf("Outside temp: %f\n", data.outsideTemp ); printf("Outside humidity: %d\n", data.outsideHumidity ); printf("Dew Point: %f\n", data.dewPoint ); printf("Barometer: %f\n", data.barometer ); printf("Wind speed: %f\n", data.instantWindSpeed ); printf("Wind direction: %d\n", data.instantWindDirection ); printf("Average Wind: %f\n", data.avgWindSpeed_2min ); printf("Wind Gust: %f\n", data.windGust_10min); printf("rainRate: %f\n", data.rainRate ); printf("dailyRain: %f\n", data.dailyRain ); printf("lastHourRain: %f\n", data.lastHourRain ); /* clean-up */ curl_easy_cleanup(curl); tidyBufFree(&docbuf); //tidyBufFree(&tidy_errbuf); tidyRelease(tdoc); return(err); return(0); }