Ejemplo n.º 1
0
int main(int argc, char **argv )
{
  CURL *curl;
  char curl_errbuf[CURL_ERROR_SIZE];
  TidyDoc tdoc;
  TidyBuffer docbuf = {0};
  TidyBuffer tidy_errbuf = {0};
  int err;
  if ( argc == 2) {
    curl = curl_easy_init();
    curl_easy_setopt(curl, CURLOPT_URL, argv[1]);
    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);

    tdoc = tidyCreate();
    tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */
    tidyOptSetInt(tdoc, TidyWrapLen, 4096);
    tidySetErrorBuffer( tdoc, &tidy_errbuf );
    tidyBufInit(&docbuf);

    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf);
    err=curl_easy_perform(curl);
    if ( !err ) {
      err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */
      if ( err >= 0 ) {
        err = tidyCleanAndRepair(tdoc); /* fix any problems */
        if ( err >= 0 ) {
          err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */
          if ( err >= 0 ) {
            dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */
            fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */
          }
        }
      }
    }
    else
      fprintf(stderr, "%s\n", curl_errbuf);

    /* clean-up */
    curl_easy_cleanup(curl);
    tidyBufFree(&docbuf);
    tidyBufFree(&tidy_errbuf);
    tidyRelease(tdoc);
    return(err);

  }
  else
    printf( "usage: %s <url>\n", argv[0] );

  return(0);
}
Ejemplo n.º 2
0
Archivo: tdoc.c Proyecto: nuxlli/wax
int lua_tidy_toTable(lua_State*L)
{
    pTidy t = toTidy(L,1);

    TidyNode node = tidyGetRoot(t->tdoc);
    if (!node)
        return 0;
    
    lua_newtable(L);
    traverseNodes(t->tdoc, L, node);
    
    return 1;    
}
Ejemplo n.º 3
0
static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
{
	PHPTidyObj *newobj;
	TidyNode node;
	TIDY_FETCH_OBJECT;

	switch (node_type) {
		case is_root_node:
			node = tidyGetRoot(obj->ptdoc->doc);
			break;

		case is_html_node:
			node = tidyGetHtml(obj->ptdoc->doc);
			break;

		case is_head_node:
			node = tidyGetHead(obj->ptdoc->doc);
			break;

		case is_body_node:
			node = tidyGetBody(obj->ptdoc->doc);
			break;

		default:
			RETURN_NULL();
			break;
	}

	if (!node) {
		RETURN_NULL();
	}

	tidy_instanciate(tidy_ce_node, return_value);
	newobj = Z_TIDY_P(return_value);
	newobj->type  = is_node;
	newobj->ptdoc = obj->ptdoc;
	newobj->node  = node;
	newobj->ptdoc->ref_count++;

	tidy_add_default_properties(newobj, is_node);
}
Ejemplo n.º 4
0
Archivo: parse.c Proyecto: ASpade/mulk
void parse_urls(const char *filename, const url_list_t *elem)
{
	TidyDoc tdoc;
	int err;
	FILE *outfile = NULL;

	tdoc = tidyCreate();
	tidyOptSetBool(tdoc, TidyForceOutput, yes);
	tidyOptSetBool(tdoc, TidyMark, no);
	tidyOptSetBool(tdoc, TidyHideEndTags, yes);
	tidyOptSetBool(tdoc, TidyDropEmptyParas, no);
	tidyOptSetBool(tdoc, TidyJoinStyles, no);
	tidyOptSetBool(tdoc, TidyPreserveEntities, yes);
	tidyOptSetInt(tdoc, TidyMergeDivs, no);
	tidyOptSetInt(tdoc, TidyMergeSpans, no);
	tidyOptSetInt(tdoc, TidyWrapLen, 4096);
	tidyOptSetValue(tdoc, TidyCharEncoding, "utf8");
	tidySetReportFilter(tdoc, filter_cb);

	err = tidyParseFile(tdoc, filename);

	if (err >= 0) 
		err = tidyCleanAndRepair(tdoc);

	if (err >= 0) {
		outfile = option_values.save_relative_links && !option_values.disable_save_tree
			? fopen(filename, "w") : NULL;

		parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile);

		if (outfile)
			fclose(outfile);
	}

	tidyRelease(tdoc);
}
Ejemplo n.º 5
0
bool nuiHTML::Load(nglIStream& rStream, nglTextEncoding OverrideContentsEncoding, const nglString& rSourceURL)
{
  if (!rSourceURL.IsEmpty())
    SetSourceURL(rSourceURL);
  
  int res = -1;
  nglTextEncoding encoding = eUTF8;
  TidyDoc tdoc = NULL;
  {
    HTMLStream strm(rStream);
    tdoc = tidyCreate();
    tidyOptSetBool(tdoc, TidyShowMarkup, no);
    tidyOptSetBool(tdoc, TidyShowWarnings, no);
    tidyOptSetInt(tdoc, TidyShowErrors, 0);
    tidyOptSetBool(tdoc, TidyQuiet, yes);
    tidySetCharEncoding(tdoc, "utf8");
    
    TidyInputSource source;
    tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF);
    res = tidyParseSource(tdoc, &source);
    
    if ( res >= 0 )
      res = tidyCleanAndRepair(tdoc);               // Tidy it up!
    if ( res >= 0 )
      res = tidyRunDiagnostics(tdoc);               // Kvetch
  
    if (OverrideContentsEncoding == eEncodingUnknown)
    {
      nglString encoding_string(GetEncodingString(tidyGetRoot(tdoc)));
      
      //ascii, latin1, raw, utf8, iso2022, mac, win1252, utf16le, utf16be, utf16, big5 shiftjis
      encoding = nuiGetTextEncodingFromString(encoding_string);
    }
    else
    {
      encoding = OverrideContentsEncoding;
    }
  }
  
  char* pStr = NULL;

  if (encoding != eUTF8)
  {
    // Release the doc to create a new one
    tidyRelease(tdoc);
    
    nglOMemory omem;
    rStream.SetPos(0, eStreamFromStart);
    rStream.PipeTo(omem);
    nglString decoded;
    decoded.Import(omem.GetBufferData(), omem.GetSize(), encoding);
    pStr = decoded.Export(eUTF8);
    nglIMemory imem(pStr, strlen(pStr));
    
    HTMLStream strm(imem);
    tdoc = tidyCreate();
    tidySetCharEncoding(tdoc, "utf8");

    TidyInputSource source;
    tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF);
    res = tidyParseSource(tdoc, &source);
    if ( res >= 0 )
      res = tidyCleanAndRepair(tdoc);               // Tidy it up!
    if ( res >= 0 )
      res = tidyRunDiagnostics(tdoc);               // Kvetch
  }    
    
  BuildTree(tdoc, tidyGetRoot(tdoc), eUTF8, mComputeStyle);
  
  tidyRelease(tdoc);
  
  if (pStr)
    free(pStr);
  
  return res < 2;
}
Ejemplo n.º 6
0
static void traverseTidy(void)
{
	traverseNode(tidyGetRoot(tdoc), 0);
}
Ejemplo n.º 7
0
Archivo: tdoc.c Proyecto: nuxlli/wax
// returns the root node object.
int lua_tidy_getRootNode ( lua_State *L)
{
    pTidy t = toTidy(L,1);
    TidyNode tn = tidyGetRoot(t->tdoc);
    return push_node(L, tn, t);
}
Ejemplo n.º 8
0
int CCaHtmlParse::ParseCaHtmlFlights(std::list<SCaLowPriceFlightDetail*> & listFlight, const std::string& strHtmlData, const CStringA & straDCode, const CStringA & straACode, const SCaLowPriceFlightInfo*	pLowPriceFlightInfo)
{
	TidyDoc doc = tidyCreate();
	tidySetCharEncoding(doc,"raw");
	tidyParseString(doc,strHtmlData.c_str());
	TidyNode tnRoot = tidyGetRoot(doc);

	TidyNode tFlightTab;
	TidyNode tdChild;
	int nIndexTd = 0;

	CTime tCurrent = CTime::GetCurrentTime();
	SCaLowPriceFlightDetail *pfindFlight = NULL;
	if (FindNode(tnRoot,"class","CA_table mt_10 clear",tFlightTab))
	{
		//循环解析结算价,tblPolicy下的每一个子节点即为一条结算价信息
		TidyNode trFlight;
		int nIndexTr = 0;
		BOOL bValid = FALSE;
		CStringA straDPortCode = straDCode;
		CStringA straAPortCode = straACode;
		CStringA straFlightNo("");
		CStringA straFlightStartDate("");
		CStringA straSaleEndDate("");
		CStringA straSaleEndTime("");
		CStringA straFlightStartTime("");

		UINT uPrice = 0;
		UINT uRemainTicket = 0;
		for ( trFlight = tidyGetChild(tFlightTab); trFlight; trFlight = tidyGetNext(trFlight) )
		{
			if (0 == nIndexTr)//跳过表头
			{
				nIndexTr++;
				continue;
			}

			nIndexTd = 0;
			bValid = FALSE;
			straFlightNo = "";
			straFlightStartDate = "";
			straSaleEndDate = "";
			straSaleEndTime = "";
			straFlightStartTime = "";
			uPrice = 0;
			uRemainTicket = 0;
			for ( tdChild = tidyGetChild(trFlight); tdChild; tdChild = tidyGetNext(tdChild) )
			{
				switch(nIndexTd)
				{
				case 0:
					{
						//选择,是否为disabled
						bValid = __IsFlightValid(tdChild);
						TRACE(_T("Flight valid:%d-"), bValid);
						
					}
					break;
				case 1:
					{
						//日期/航班号
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						__GetFlightNoAndFlightStartDate(straFlightNo, straFlightStartDate, doc, tdChild);
						TRACE("date:%s, no:%s-", straFlightStartDate, straFlightNo);
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
					}
					break;
				case 2:
					{
						//起降时间
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						__GetFlightStartTime(straFlightStartTime, doc, tdChild);
					}
					break;
				case 3:
					{
						//机场
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						if (__IsTwoAirPort(straDCode, straACode))
						{
							__GetAirPortCode(straDPortCode, straAPortCode, doc, tdChild);
							if(straDPortCode.IsEmpty())
								straDPortCode = straDCode;
							if(straAPortCode.IsEmpty())
								straAPortCode = straACode;
							TRACE("%s->%s-", straDPortCode, straAPortCode);
						}

					}
					break;
				case 4:
					{
						//销售结束日期,时间
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						//TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						__GetSaleEndDate(straSaleEndDate, straSaleEndTime, doc, tdChild);
						TRACE("sale end date:%s, %s-", straSaleEndDate, straSaleEndTime);
					}
					break;
				case 5:
					{
						//团购价
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						//TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						//CStringA straSetPrice = GetNodeContent(doc, tdChild);

						//double fSetPrice = atof(straSetPrice.GetBuffer(0));
						//straSetPrice.ReleaseBuffer();
						//tidyRelease(doc);
						//return fSetPrice;
						__GetPriceAndRamainTicket(&uPrice, &uRemainTicket, doc, tdChild);
						TRACE("price:%d, remain %d seats", uPrice, uRemainTicket);
					}
					break;
				}

				nIndexTd++;
			}
			TRACE(_T("\r\n"));

			//截至日期之后的航班不抓取
			//得到起飞日期
			int nFlightStartYear = 2014;
			int nFlightStartMonth = 12;
			int nFlightStartDay = 12;
			GetYearMonthDay(straFlightStartDate, &nFlightStartYear, &nFlightStartMonth, &nFlightStartDay);
			
			CTime tStart(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 0, 0, 0);
			//if (!m_bGetAllCaTuanFlight)
			//{
			//	if (tStart > m_tGetEndTime)
			//		continue;
			//}
			//
			
			//double d6 = pLowPriceFlightInfo->iMinHangPrice * 0.6;
			//UINT u6 = (UINT)d6;
			////6折以上普通团购退改签要收费(低价申请不受限制),所以不上
			//if (uPrice > d6 && CA_TUAN_PRODUCT == pLowPriceFlightInfo->iProductType)
			//{
			//	bValid = FALSE;
			//	uRemainTicket = 0;
			//	continue;
			//}
			//相同日期、时间、班次的航班,只取最低价
			BOOL bFind = __findCaFlight(&pfindFlight, straFlightStartDate, straDPortCode, straAPortCode, straFlightNo, listFlight);
			if (bFind)
			{
				int nCurPrice = (int)uPrice;
				//当前解析出的这个比上次解析出的便宜
				if(pfindFlight->nPrice > nCurPrice)
				{
					if (uRemainTicket > m_nMinTicketWarnNum)
					{
						//当前票的数量充足时,用当前票的数量更新上次解析出的数量
						pfindFlight->nRemainSeat = uRemainTicket;
						pfindFlight->nPrice = nCurPrice;
						pfindFlight = NULL;
					}
				}
				else //(pfindFlight->nPrice <= nCurPrice)
				{
					if(pfindFlight->nRemainSeat <= m_nMinTicketWarnNum)
					{
						pfindFlight->nRemainSeat = uRemainTicket;
						pfindFlight->nPrice = nCurPrice;
						pfindFlight = NULL;
					}
				}

				continue;
			}

			//保存解析出来的航班信息,调用者负责释放内存
			if (bValid)
			{
				SCaLowPriceFlightDetail* pDetail = new SCaLowPriceFlightDetail;
				pDetail->straCompany = "CA";	
				pDetail->straFromCityCode = straDPortCode;	
				pDetail->straToCityCode = straAPortCode;	
				pDetail->straFlightNo = straFlightNo;		
				pDetail->straFromDate = straFlightStartDate;	
				//由于携程订单进入需要一定的时间,国航下班16:00下班,所以当天的票,第2天12:00之前的票,销售结束时间提前30分钟,	
				//取销售间隔
				int nSaleEndYear = 2014;
				int nSaleEndMonth = 12;
				int nSaleEndDay = 12;
				GetYearMonthDay(straSaleEndDate, &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay);
				int nSaleEndHour = 12;
				int nSaleEndMin = 0;
				GetHourMinSec(straSaleEndTime, &nSaleEndHour, &nSaleEndMin);
				CTime tSaleEndDate(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, 0);
				CTimeSpan tSpan = tSaleEndDate - tCurrent;
				//end 取销售间隔
				//得到起飞时间
				int nFlightStartHour = 12;
				int nFlightStartMin = 0;
				GetHourMinSec(straFlightStartTime, &nFlightStartHour, &nFlightStartMin);
				CTime tFlightStartTime(nFlightStartYear, nFlightStartMonth, nFlightStartDay, nFlightStartHour, nFlightStartMin, 0);
				CTime tTimeKey(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 12, 0, 0);
				//end 得到起飞时间
	
				//今明两天的、起飞时间在12点之前、且是低价申请的,销售结束时间为 前一天的官网销售结束的前30分钟
				if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (1 == tSpan.GetDays()))//明天的的低价申请
				{	
					if(tFlightStartTime <= tTimeKey)//明天12起飞的低价申请, 今天下午3:25前有效(国航4点下班)
					{
						pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay());
						CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 25, 0);
						pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0);	
					}
					else//明天12后起飞的低价申请,明早可以出票
					{
						pDetail->straSaleEndDate = straSaleEndDate;
						pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0);
					}
				}
				else if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (tSpan.GetDays() < 1))//今天的的低价申请,今天下午3:30前有效(国航4点下班)
				{
					pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay());
					CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 30, 0);
					pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0);	
				}
				else//普通团购,后天及以后的低价申请
				{
					pDetail->straSaleEndDate = straSaleEndDate;
					pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0);
				}

				//政策销售时间到,删除政策
				GetYearMonthDay(pDetail->straSaleEndDate,  &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay);
				int nSaleEndSec = 0;
				GetHourMinSec(pDetail->straSaleEndTime, &nSaleEndHour, &nSaleEndMin, &nSaleEndSec);
				CTime tPolicyDeleteTime(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, nSaleEndSec);
				if (tCurrent >= tPolicyDeleteTime)
					uRemainTicket = 0;

				pDetail->nPrice = uPrice;				
				pDetail->nProductId = pLowPriceFlightInfo->iProductId;			
				pDetail->nRemainSeat = uRemainTicket;	
				pDetail->nProductType = pLowPriceFlightInfo->iProductType;

				listFlight.push_back(pDetail);
			}
		}
	}

	tidyRelease(doc);	

	return -1.0;
}
Ejemplo n.º 9
0
int main(int argc, char **argv )
{
   CURL *curl;
   char curl_errbuf[CURL_ERROR_SIZE];
   char url[URL_BUF_SIZE];
   char *username;
   TidyDoc tdoc;
   TidyBuffer docbuf = {0};
   TidyBuffer tidy_errbuf = {0};
   int err;
   if ( argc == 2) 
   {
      username = argv[1];
   }
   else
   {
      username = "******";
   }
   WeatherData data;
   snprintf(url, URL_BUF_SIZE, "http://www.weatherlink.com/user/%s/index.php?view=summary&headers=0&type=2", username);
   curl = curl_easy_init();
   curl_easy_setopt(curl, CURLOPT_URL, url);
   curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
   curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L);
   curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L);
   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);

   tdoc = tidyCreate();
   tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */
   tidyOptSetInt(tdoc, TidyWrapLen, 4096);
   tidySetErrorBuffer( tdoc, &tidy_errbuf );
   tidyBufInit(&docbuf);

   curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf);
   err=curl_easy_perform(curl);
   if ( !err ) 
   {
      err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */
      if ( err >= 0 ) 
      {
         err = tidyCleanAndRepair(tdoc); /* fix any problems */
         if ( err >= 0 ) 
         {
            dumpNode( tdoc, tidyGetRoot(tdoc), 0, &data ); /* walk the tree */
            //err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */
            //if ( err >= 0 ) 
            //{
               //dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */
            //   fprintf(stderr, ">> %s\n", tidy_errbuf.bp); /* show errors */
            //}
         }
      }
   }
   else
   {
      fprintf(stderr, "%s\n", curl_errbuf);
   }
   printf("Outside temp: %f\n", data.outsideTemp );
   printf("Outside humidity: %d\n", data.outsideHumidity );
   printf("Dew Point: %f\n", data.dewPoint );
   printf("Barometer: %f\n", data.barometer );
   printf("Wind speed: %f\n", data.instantWindSpeed );
   printf("Wind direction: %d\n", data.instantWindDirection );
   printf("Average Wind: %f\n", data.avgWindSpeed_2min );
   printf("Wind Gust: %f\n", data.windGust_10min);
   printf("rainRate: %f\n", data.rainRate );
   printf("dailyRain: %f\n", data.dailyRain );
   printf("lastHourRain: %f\n", data.lastHourRain );

   /* clean-up */
   curl_easy_cleanup(curl);
   tidyBufFree(&docbuf);
   //tidyBufFree(&tidy_errbuf);
   tidyRelease(tdoc);
   return(err);


  return(0);
}