Ejemplo n.º 1
0
/* the entry point */
void html2nodes(const char *htmltext, bool startpage)
{
	char *htmlfix = 0;

	tdoc = tidyCreate();
	if (!startpage)
		tidyOptSetInt(tdoc, TidyBodyOnly, yes);
	tidySetReportFilter(tdoc, tidyErrorHandler);
//    tidySetReportFilter(tdoc, tidyReportFilter);

	tidySetCharEncoding(tdoc, (cons_utf8 ? "utf8" : "latin1"));

	htmlfix = tidyPreprocess(htmltext);
	if (htmlfix) {
		tidyParseString(tdoc, htmlfix);
		nzFree(htmlfix);
	} else
		tidyParseString(tdoc, htmltext);

	tidyCleanAndRepair(tdoc);

	if (debugLevel >= 5) {
		traverse_tidycall = printNode;
		traverseTidy();
	}

/* convert tidy nodes into edbrowse nodes */
	traverse_tidycall = convertNode;
	traverseTidy();

	tidyRelease(tdoc);
}				/* html2nodes */
Ejemplo n.º 2
0
Archivo: tdoc.c Proyecto: nuxlli/wax
int lua_tidy_SetCharEncoding(lua_State *L)
{
    pTidy t = toTidy(L,1);
	const char * enc = lua_tostring(L,2);
	lua_pushnumber(L, tidySetCharEncoding (t->tdoc, enc));
	return 1;
}
Ejemplo n.º 3
0
QString tidyHtml(QString str, bool& ok) {
#ifdef NO_TIDY
  ok = true;
  return str;
#else
  QString res = str;
  ok = false;

  static bool isTidyWithIntBodyOnly = isTidyWithIntBodyOnlyCheck();
  
  TidyDoc tdoc = tidyCreate();
  TidyBuffer output;
  TidyBuffer errbuf;

  tidyBufInit(&output);
  tidyBufInit(&errbuf);

  bool configOk = 
    tidyOptSetBool(tdoc, TidyXhtmlOut, yes) && 
    tidyOptSetBool(tdoc, TidyForceOutput, yes) &&
    tidyOptSetBool(tdoc, TidyMark, no) &&
    (isTidyWithIntBodyOnly
     ? tidyOptSetInt(tdoc, TidyBodyOnly, 1)
     : tidyOptSetBool(tdoc, TidyBodyOnly, yes)) &&
    tidyOptSetInt(tdoc, TidyWrapLen, 0) &&
    tidyOptSetInt(tdoc, TidyDoctypeMode, TidyDoctypeOmit);
    
  if (configOk &&
      (tidySetCharEncoding(tdoc, "utf8") >= 0) &&
      (tidySetErrorBuffer(tdoc, &errbuf) >= 0) &&
      (tidyParseString(tdoc, str.toUtf8().data()) >= 0) &&
      (tidyCleanAndRepair(tdoc) >= 0) &&
      (tidyRunDiagnostics(tdoc) >= 0) &&
      (tidySaveBuffer(tdoc, &output) >= 0) &&
      (output.bp != 0 && output.size > 0)) {
    res = QString::fromUtf8((char*)output.bp, output.size);

    ok = true;
  }

#ifdef DEBUG_MARKUP
  if (errbuf.size > 0) {
    QString errStr =  QString::fromUtf8((char*)errbuf.bp, errbuf.size);
    qDebug() << "\n[DEBUG] MARKUP, libtidy errors and warnings:\n" << errStr;
  }
#endif

  if (output.bp != 0)
    tidyBufFree(&output);
  if (errbuf.bp != 0)
    tidyBufFree(&errbuf);
  tidyRelease(tdoc);

  return res.trimmed();
#endif
}
Ejemplo n.º 4
0
tidyhtml::tidyhtml()
{
    this->tdoc = tidyCreate();
    tidyOptSetBool( tdoc, TidyXhtmlOut, yes);
    tidyOptSetBool( tdoc, TidyForceOutput, yes);
    tidyOptSetBool( tdoc, TidyShowWarnings, no);
    tidyOptSetBool( tdoc, TidyQuiet, yes);
    tidyOptSetInt( tdoc, TidyShowErrors, 0);
    tidySetCharEncoding(tdoc, "utf8");
}
Ejemplo n.º 5
0
QString tidy(QString input)
// take html code and return it converted to xhtml code
{                                                                              
  // the following code is (c) Charles Reitzel and Dave Raggett, see the package tidy                                                                                                                                                             
  TidyBuffer output = {0};                                                                                               
  TidyBuffer errbuf = {0};                                                                                               
  QString result;                                                                                                        
  int rc = -1;                                                                                                           
  Bool ok;                                                                                                               

  TidyDoc tdoc = tidyCreate();                             // Initialize "document"
  ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );          // Convert to XHTML
  if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
  tidySetCharEncoding( tdoc, "utf8" );
  if ( rc >= 0 ) rc = tidyParseString( tdoc, input.toUtf8().constData() );      // Parse the input    
  if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc );          // Tidy it up!        
  if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc );          // Kvetch             
  if ( rc > 1 )                                            // If error, force output.
    rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );                   
  if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output );     // Pretty Print           
  if ( rc >= 0 )                                                                     
  {                                                                                                
    char* outputstring; // content of the outputfile                                 

    // find out length of outputstring
    int length=0; // length of outputstring
    byte* string=output.bp;                
    while (*string)                        
    {                                      
      string++;                                                
      length++;                                                
    }                                                          

    outputstring=(char*)malloc(length);        
    snprintf(outputstring,length,"%s",output.bp);
    result=QString::fromUtf8(outputstring,length);
  }                                                    
  else                                                 
    printf( "A severe error (\%d) occurred.\\n", rc ); 
  tidyBufFree( &output );                              
  tidyBufFree( &errbuf );                              
  tidyRelease( tdoc );
  result=result.replace("&Atilde;&para;","&ouml;");
  return result;                                       
}
Ejemplo n.º 6
0
static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint len, char *enc)
{
	TidyBuffer buf;

	if(enc) {
		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
			return FAILURE;
		}
	}

	obj->ptdoc->initialized = 1;

	tidyBufInit(&buf);
	tidyBufAttach(&buf, (byte *) string, len);
	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
		php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
		return FAILURE;
	}
	tidy_doc_update_properties(obj);

	return SUCCESS;
}
Ejemplo n.º 7
0
static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
{
	char *enc = NULL;
	size_t enc_len = 0;
	zend_bool use_include_path = 0;
	TidyDoc doc;
	TidyBuffer *errbuf;
	zend_string *data, *arg1;
	zval *config = NULL;

	if (is_file) {
		if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
			RETURN_FALSE;
		}
		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
			RETURN_FALSE;
		}
	} else {
		if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
			RETURN_FALSE;
		}
		data = arg1;
	}

	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
		php_error_docref(NULL, E_WARNING, "Input string is too long");
		RETURN_FALSE;
	}

	doc = tidyCreate();
	errbuf = emalloc(sizeof(TidyBuffer));
	tidyBufInit(errbuf);

	if (tidySetErrorBuffer(doc, errbuf) != 0) {
		tidyBufFree(errbuf);
		efree(errbuf);
		tidyRelease(doc);
		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
	}

	tidyOptSetBool(doc, TidyForceOutput, yes);
	tidyOptSetBool(doc, TidyMark, no);

	TIDY_SET_DEFAULT_CONFIG(doc);

	if (config) {
		TIDY_APPLY_CONFIG_ZVAL(doc, config);
	}

	if(enc_len) {
		if (tidySetCharEncoding(doc, enc) < 0) {
			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
			RETVAL_FALSE;
		}
	}

	if (data) {
		TidyBuffer buf;

		tidyBufInit(&buf);
		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint)ZSTR_LEN(data));

		if (tidyParseBuffer(doc, &buf) < 0) {
			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
			RETVAL_FALSE;
		} else {
			if (tidyCleanAndRepair(doc) >= 0) {
				TidyBuffer output;
				tidyBufInit(&output);

				tidySaveBuffer (doc, &output);
				FIX_BUFFER(&output);
				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
				tidyBufFree(&output);
			} else {
				RETVAL_FALSE;
			}
		}
	}

	if (is_file) {
		zend_string_release(data);
	}

	tidyBufFree(errbuf);
	efree(errbuf);
	tidyRelease(doc);
}
Ejemplo n.º 8
0
bool nuiHTML::Load(nglIStream& rStream, nglTextEncoding OverrideContentsEncoding, const nglString& rSourceURL)
{
  if (!rSourceURL.IsEmpty())
    SetSourceURL(rSourceURL);
  
  int res = -1;
  nglTextEncoding encoding = eUTF8;
  TidyDoc tdoc = NULL;
  {
    HTMLStream strm(rStream);
    tdoc = tidyCreate();
    tidyOptSetBool(tdoc, TidyShowMarkup, no);
    tidyOptSetBool(tdoc, TidyShowWarnings, no);
    tidyOptSetInt(tdoc, TidyShowErrors, 0);
    tidyOptSetBool(tdoc, TidyQuiet, yes);
    tidySetCharEncoding(tdoc, "utf8");
    
    TidyInputSource source;
    tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF);
    res = tidyParseSource(tdoc, &source);
    
    if ( res >= 0 )
      res = tidyCleanAndRepair(tdoc);               // Tidy it up!
    if ( res >= 0 )
      res = tidyRunDiagnostics(tdoc);               // Kvetch
  
    if (OverrideContentsEncoding == eEncodingUnknown)
    {
      nglString encoding_string(GetEncodingString(tidyGetRoot(tdoc)));
      
      //ascii, latin1, raw, utf8, iso2022, mac, win1252, utf16le, utf16be, utf16, big5 shiftjis
      encoding = nuiGetTextEncodingFromString(encoding_string);
    }
    else
    {
      encoding = OverrideContentsEncoding;
    }
  }
  
  char* pStr = NULL;

  if (encoding != eUTF8)
  {
    // Release the doc to create a new one
    tidyRelease(tdoc);
    
    nglOMemory omem;
    rStream.SetPos(0, eStreamFromStart);
    rStream.PipeTo(omem);
    nglString decoded;
    decoded.Import(omem.GetBufferData(), omem.GetSize(), encoding);
    pStr = decoded.Export(eUTF8);
    nglIMemory imem(pStr, strlen(pStr));
    
    HTMLStream strm(imem);
    tdoc = tidyCreate();
    tidySetCharEncoding(tdoc, "utf8");

    TidyInputSource source;
    tidyInitSource( &source, &strm, &HTMLStream::TidyGetByte, &HTMLStream::TidyUngetByte, &HTMLStream::TidyEOF);
    res = tidyParseSource(tdoc, &source);
    if ( res >= 0 )
      res = tidyCleanAndRepair(tdoc);               // Tidy it up!
    if ( res >= 0 )
      res = tidyRunDiagnostics(tdoc);               // Kvetch
  }    
    
  BuildTree(tdoc, tidyGetRoot(tdoc), eUTF8, mComputeStyle);
  
  tidyRelease(tdoc);
  
  if (pStr)
    free(pStr);
  
  return res < 2;
}
Ejemplo n.º 9
0
int CProxyParse::RunFromMem( wxString content )
{
	char *pBuffer;
	//http://www.51proxied.com/http_non_anonymous.html
	//wxString path = wxT("f:/work/windows/wxUrlRefresh/data/最新透明HTTP代理服务器.htm");
	//wxString path1 = wxT("f:/work/windows/wxUrlRefresh/data/result.xml");

	wxString data_path = wxGetCwd() + "/data/";
	wxString path1 = data_path + "_tmp.xml";

	if (!wxDirExists(data_path))
		wxMkdir(data_path);

	pBuffer = (char*)calloc(content.Length()+1, 1);
	wxStrncpy(pBuffer, content, content.Len()+1);


	wxLogMessage("Run Tidy!");
	TidyBuffer output;
	TidyBuffer errbuf;
	int rc = -1;
	Bool ok;
	TidyDoc tdoc = tidyCreate();                     // Initialize "document"
	tidyBufInit( &output );
	tidyBufInit( &errbuf );
	//printf( "Tidying:\t\%s\\n", input );
	tidySetCharEncoding(tdoc, "utf8");
	ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
	if ( ok )
		rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
	if ( rc >= 0 )
		rc = tidyParseString( tdoc, pBuffer );           // Parse the input
	if ( rc >= 0 )
		rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
	if ( rc >= 0 )
		rc = tidyRunDiagnostics( tdoc );               // Kvetch
	if ( rc > 1 )                                    // If error, force output.
		rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
	if ( rc >= 0 )
		rc = tidySaveBuffer( tdoc, &output );          // Pretty Print
	if ( rc >= 0 )
	{
#ifdef _DEBUG
		//if ( rc > 0 )
		//	WriteAllToFile("f:/work/windows/wxUrlRefresh/data/error.xml", (char*)errbuf.bp, errbuf.size);
		WriteAllToFile(path1, (char*)output.bp, output.size);
#endif

	}
	else
		wxLogError("tidyFail");

	tidyBufFree( &output );
	tidyBufFree( &errbuf );
	tidyRelease( tdoc );
	if (pBuffer) free(pBuffer);


	wxLogMessage("Fetch data!");
	// 解析数据
	TiXmlDocument doc(path1);
	if (doc.LoadFile()) 
	{
		// root
		CTiXmlProxyVistor vistor(&m_array);
		TiXmlElement *pRoot = doc.RootElement();
		pRoot->Accept(&vistor);
	}
	else
	{
		wxLogMessage("shit");
		return -2;
	}
	return 0;
}
Ejemplo n.º 10
0
void CCFHtmlTidy::SetTidyProp(const std::string& strParam, int nNumValue, const std::string& /*strNumValue*/, const std::string& strTextValue)
{
	TidyDoc formatter = tidyImplToDoc(tidy);
	if ("axd" == strParam)
	{
		tidyOptSetBool(formatter, TidyXmlDecl, yes);
	}
	else if ("axs" == strParam)
	{
		tidyOptSetBool(formatter, TidyXmlSpace, yes);
	}
	else if ("aan" == strParam)
	{
		tidyOptSetBool(formatter, TidyAnchorAsName, yes);
	}
	else if ("axp" == strParam)
	{
		tidyOptSetBool(formatter, TidyXmlPIs, yes);
	}
	else if ("b" == strParam)
	{
		tidyOptSetBool(formatter, TidyMakeBare, yes);
	}
	else if ("c" == strParam)
	{
		tidyOptSetBool(formatter, TidyMakeClean, yes);
	}
	else if ("diu" == strParam)
	{
		tidyOptSetBool(formatter, TidyDecorateInferredUL, yes);
	}
	else if ("dep" == strParam)
	{
		tidyOptSetBool(formatter, TidyDropEmptyParas, yes);
	}
	else if ("dft" == strParam)
	{
		tidyOptSetBool(formatter, TidyDropFontTags, yes);
	}
	else if ("dpa" == strParam)
	{
		tidyOptSetBool(formatter, TidyDropPropAttrs, yes);
	}
	else if ("ebt" == strParam)
	{
		tidyOptSetBool(formatter, TidyEncloseBlockText, yes);
	}
	else if ("et" == strParam)
	{
		tidyOptSetBool(formatter, TidyEncloseBodyText, yes);
	}
	else if ("ec" == strParam)
	{
		tidyOptSetBool(formatter, TidyEscapeCdata, yes);
	}
	else if ("fb" == strParam)
	{
		tidyOptSetBool(formatter, TidyFixBackslash, yes);
	}
	else if ("fbc" == strParam)
	{
		tidyOptSetBool(formatter, TidyFixComments, yes);
	}
	else if ("fu" == strParam)
	{
		tidyOptSetBool(formatter, TidyFixUri, yes);
	}
	else if ("hc" == strParam)
	{
		tidyOptSetBool(formatter, TidyHideComments, yes);
	}
	else if ("he" == strParam)
	{
		tidyOptSetBool(formatter, TidyHideEndTags, yes);
	}
	else if ("ic" == strParam)
	{
		tidyOptSetBool(formatter, TidyIndentCdata, yes);
	}
	else if ("ix" == strParam)
	{
		tidyOptSetBool(formatter, TidyXmlTags, yes);
	}
	else if ("jc" == strParam)
	{
		tidyOptSetBool(formatter, TidyJoinClasses, yes);
	}
	else if ("js" == strParam)
	{
		tidyOptSetBool(formatter, TidyJoinStyles, yes);
	}
	else if ("la" == strParam)
	{
		tidyOptSetBool(formatter, TidyLiteralAttribs, yes);
	}
	else if ("le" == strParam)
	{
		tidyOptSetBool(formatter, TidyLogicalEmphasis, yes);
	}
	else if ("ll" == strParam)
	{
		tidyOptSetBool(formatter, TidyLowerLiterals, yes);
	}
	else if ("n" == strParam)
	{
		tidyOptSetBool(formatter, TidyNCR, yes);
	}
	else if ("ne" == strParam)
	{
		tidyOptSetBool(formatter, TidyNumEntities, yes);
	}
	else if ("oh" == strParam)
	{
		tidyOptSetBool(formatter, TidyHtmlOut, yes);
	}
	else if ("ox" == strParam)
	{
		tidyOptSetBool(formatter, TidyXhtmlOut, yes);
	}
	else if ("oxm" == strParam)
	{
		tidyOptSetBool(formatter, TidyXmlOut, yes);
	}
	else if ("pe" == strParam)
	{
		tidyOptSetBool(formatter, TidyPreserveEntities, yes);
	}
	else if ("qa" == strParam)
	{
		tidyOptSetBool(formatter, TidyQuoteAmpersand, yes);
	}
	else if ("qm" == strParam)
	{
		tidyOptSetBool(formatter, TidyQuoteMarks, yes);
	}
	else if ("qn" == strParam)
	{
		tidyOptSetBool(formatter, TidyQuoteNbsp, yes);
	}
	else if ("rc" == strParam)
	{
		tidyOptSetBool(formatter, TidyReplaceColor, yes);
	}
	else if ("ua" == strParam)
	{
		tidyOptSetBool(formatter, TidyUpperCaseAttrs, yes);
	}
	else if ("ut" == strParam)
	{
		tidyOptSetBool(formatter, TidyUpperCaseTags, yes);
	}
	else if ("wo" == strParam)
	{
		tidyOptSetBool(formatter, TidyWord2000, yes);
	}
	else if ("bbb" == strParam)
	{
		tidyOptSetBool(formatter, TidyBreakBeforeBR, yes);
	}
	else if ("ia" == strParam)
	{
		tidyOptSetBool(formatter, TidyIndentAttributes, yes);
	}
	else if ("m" == strParam)
	{
		tidyOptSetBool(formatter, TidyShowMarkup, yes);
	}
	else if ("pw" == strParam)
	{
		tidyOptSetBool(formatter, TidyPunctWrap, yes);
	}
	else if ("vs" == strParam)
	{
		tidyOptSetBool(formatter, TidyVertSpace, yes);
	}
	else if ("wa" == strParam)
	{
		tidyOptSetBool(formatter, TidyWrapAsp, yes);
	}
	else if ("wat" == strParam)
	{
		tidyOptSetBool(formatter, TidyWrapAttVals, yes);
	}
	else if ("wj" == strParam)
	{
		tidyOptSetBool(formatter, TidyWrapJste, yes);
	}
	else if ("wp" == strParam)
	{
		tidyOptSetBool(formatter, TidyWrapPhp, yes);
	}
	else if ("wsl" == strParam)
	{
		tidyOptSetBool(formatter, TidyWrapScriptlets, yes);
	}
	else if ("ws" == strParam)
	{
		tidyOptSetBool(formatter, TidyWrapSection, yes);
	}
	else if ("ac" == strParam)
	{
		tidyOptSetBool(formatter, TidyAsciiChars, yes);
	}
	else if ("sw" == strParam)
	{
		tidyOptSetBool(formatter, TidyShowWarnings, yes);
	}
	else if ("fo" == strParam)
	{
		tidyOptSetBool(formatter, TidyForceOutput, yes);
	}
	else if ("i" == strParam)
	{
		tidyOptSetInt(formatter, TidyIndentContent, abs(nNumValue - 2) % 3);
	}
	else if ("md" == strParam)
	{
		tidyOptSetInt(formatter, TidyMergeDivs, abs(nNumValue - 2) % 3);
	}
	else if ("ms" == strParam)
	{
		tidyOptSetInt(formatter, TidyMergeSpans, abs(nNumValue - 2) % 3);
	}
	else if ("sbo" == strParam)
	{
		tidyOptSetInt(formatter, TidyBodyOnly, abs(nNumValue - 2) % 3);
	}
	else if ("d" == strParam)
	{
		tidyOptSetInt(formatter, TidyDoctypeMode, nNumValue % 5);
	}
	else if ("du" == strParam)
	{
		tidyOptSetValue(formatter, TidyDoctype, strTextValue.c_str());
	}
	else if ("ra" == strParam)
	{
		tidyOptSetInt(formatter, TidyDuplicateAttrs, nNumValue % 2);
	}
	else if ("sa" == strParam)
	{
		tidyOptSetInt(formatter, TidySortAttributes, nNumValue % 2);
	}
	else if ("ce" == strParam)
	{
		tidySetCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue));
	}
	else if ("ie" == strParam)
	{
		tidySetInCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue));
	}
	else if ("oe" == strParam)
	{
		tidySetOutCharEncoding(formatter, TY_(GetEncodingOptNameFromTidyId)(nNumValue));
	}
	else if ("se" == strParam)
	{
		tidyOptSetInt(formatter, TidyShowErrors, nNumValue);
	}
	else if ("is" == strParam)
	{
		tidyOptSetInt(formatter, TidyIndentSpaces, nNumValue);
	}
	else if ("ts" == strParam)
	{
		tidyOptSetInt(formatter, TidyTabSize, nNumValue);
	}
	else if ("w" == strParam)
	{
		tidyOptSetInt(formatter, TidyWrapLen, nNumValue);
	}
	else if ("at" == strParam)
	{
		tidyOptSetValue(formatter, TidyAltText, strTextValue.c_str());
	}
	else if ("cp" == strParam)
	{
		tidyOptSetValue(formatter, TidyCSSPrefix, strTextValue.c_str());
	}
	else if ("nbt" == strParam)
	{
		tidyOptSetValue(formatter, TidyBlockTags, strTextValue.c_str());
	}
	else if ("net" == strParam)
	{
		tidyOptSetValue(formatter, TidyEmptyTags, strTextValue.c_str());
	}
	else if ("nit" == strParam)
	{
		tidyOptSetValue(formatter, TidyInlineTags, strTextValue.c_str());
	}
	else if ("npt" == strParam)
	{
		tidyOptSetValue(formatter, TidyPreTags, strTextValue.c_str());
	}
}
Ejemplo n.º 11
0
int main( int argc, char** argv )
{
    ctmbstr prog = argv[0];
    ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL;
    TidyDoc tdoc = tidyCreate();
    int status = 0;

    uint contentErrors = 0;
    uint contentWarnings = 0;
    uint accessWarnings = 0;

    errout = stderr;  /* initialize to stderr */
    status = 0;
    
#ifdef TIDY_CONFIG_FILE
    if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) )
    {
        status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE );
        if ( status != 0 )
            fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_CONFIG_FILE, status);
    }
#endif /* TIDY_CONFIG_FILE */

    /* look for env var "HTML_TIDY" */
    /* then for ~/.tidyrc (on platforms defining $HOME) */

    if ( (cfgfil = getenv("HTML_TIDY")) != NULL )
    {
        status = tidyLoadConfig( tdoc, cfgfil );
        if ( status != 0 )
            fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status);
    }
#ifdef TIDY_USER_CONFIG_FILE
    else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) )
    {
        status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE );
        if ( status != 0 )
            fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_USER_CONFIG_FILE, status);
    }
#endif /* TIDY_USER_CONFIG_FILE */

    /* read command line */
    while ( argc > 0 )
    {
        if (argc > 1 && argv[1][0] == '-')
        {
            /* support -foo and --foo */
            ctmbstr arg = argv[1] + 1;

            if ( strcasecmp(arg, "xml") == 0)
                tidyOptSetBool( tdoc, TidyXmlTags, yes );

            else if ( strcasecmp(arg,   "asxml") == 0 ||
                      strcasecmp(arg, "asxhtml") == 0 )
            {
                tidyOptSetBool( tdoc, TidyXhtmlOut, yes );
            }
            else if ( strcasecmp(arg,   "ashtml") == 0 )
                tidyOptSetBool( tdoc, TidyHtmlOut, yes );

            else if ( strcasecmp(arg, "indent") == 0 )
            {
                tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
                if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
                    tidyOptResetToDefault( tdoc, TidyIndentSpaces );
            }
            else if ( strcasecmp(arg, "omit") == 0 )
                tidyOptSetBool( tdoc, TidyHideEndTags, yes );

            else if ( strcasecmp(arg, "upper") == 0 )
                tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );

            else if ( strcasecmp(arg, "clean") == 0 )
                tidyOptSetBool( tdoc, TidyMakeClean, yes );

            else if ( strcasecmp(arg, "bare") == 0 )
                tidyOptSetBool( tdoc, TidyMakeBare, yes );

            else if ( strcasecmp(arg, "raw") == 0      ||
                      strcasecmp(arg, "ascii") == 0    ||
                      strcasecmp(arg, "latin0") == 0   ||
                      strcasecmp(arg, "latin1") == 0   ||
                      strcasecmp(arg, "utf8") == 0     ||
#ifndef NO_NATIVE_ISO2022_SUPPORT
                      strcasecmp(arg, "iso2022") == 0  ||
#endif
#if SUPPORT_UTF16_ENCODINGS
                      strcasecmp(arg, "utf16le") == 0  ||
                      strcasecmp(arg, "utf16be") == 0  ||
                      strcasecmp(arg, "utf16") == 0    ||
#endif
#if SUPPORT_ASIAN_ENCODINGS
                      strcasecmp(arg, "shiftjis") == 0 ||
                      strcasecmp(arg, "big5") == 0     ||
#endif
                      strcasecmp(arg, "mac") == 0      ||
                      strcasecmp(arg, "win1252") == 0  ||
                      strcasecmp(arg, "ibm858") == 0 )
            {
                tidySetCharEncoding( tdoc, arg );
            }
            else if ( strcasecmp(arg, "numeric") == 0 )
                tidyOptSetBool( tdoc, TidyNumEntities, yes );

            else if ( strcasecmp(arg, "modify") == 0 ||
                      strcasecmp(arg, "change") == 0 ||  /* obsolete */
                      strcasecmp(arg, "update") == 0 )   /* obsolete */
            {
                tidyOptSetBool( tdoc, TidyWriteBack, yes );
            }
            else if ( strcasecmp(arg, "errors") == 0 )
                tidyOptSetBool( tdoc, TidyShowMarkup, no );

            else if ( strcasecmp(arg, "quiet") == 0 )
                tidyOptSetBool( tdoc, TidyQuiet, yes );

            else if ( strcasecmp(arg, "help") == 0 ||
                      strcasecmp(arg,    "h") == 0 || *arg == '?' )
            {
                help( prog );
                tidyRelease( tdoc );
                return 0; /* success */
            }
            else if ( strcasecmp(arg, "xml-help") == 0)
            {
                xml_help( );
                tidyRelease( tdoc );
                return 0; /* success */
            }
            else if ( strcasecmp(arg, "help-config") == 0 )
            {
                optionhelp( tdoc );
                tidyRelease( tdoc );
                return 0; /* success */
            }
            else if ( strcasecmp(arg, "xml-config") == 0 )
            {
                XMLoptionhelp( tdoc );
                tidyRelease( tdoc );
                return 0; /* success */
            }
            else if ( strcasecmp(arg, "show-config") == 0 )
            {
                optionvalues( tdoc );
                tidyRelease( tdoc );
                return 0; /* success */
            }
            else if ( strcasecmp(arg, "config") == 0 )
            {
                if ( argc >= 3 )
                {
                    ctmbstr post;

                    tidyLoadConfig( tdoc, argv[2] );

                    /* Set new error output stream if setting changed */
                    post = tidyOptGetValue( tdoc, TidyErrFile );
                    if ( post && (!errfil || !samefile(errfil, post)) )
                    {
                        errfil = post;
                        errout = tidySetErrorFile( tdoc, post );
                    }

                    --argc;
                    ++argv;
                }
            }

#if SUPPORT_ASIAN_ENCODINGS
            else if ( strcasecmp(arg, "language") == 0 ||
                      strcasecmp(arg,     "lang") == 0 )
            {
                if ( argc >= 3 )
                {
                    tidyOptSetValue( tdoc, TidyLanguage, argv[2] );
                    --argc;
                    ++argv;
                }
            }
#endif

            else if ( strcasecmp(arg, "output") == 0 ||
                      strcasecmp(arg, "-output-file") == 0 ||
                      strcasecmp(arg, "o") == 0 )
            {
                if ( argc >= 3 )
                {
                    tidyOptSetValue( tdoc, TidyOutFile, argv[2] );
                    --argc;
                    ++argv;
                }
            }
            else if ( strcasecmp(arg,  "file") == 0 ||
                      strcasecmp(arg, "-file") == 0 ||
                      strcasecmp(arg,     "f") == 0 )
            {
                if ( argc >= 3 )
                {
                    errfil = argv[2];
                    errout = tidySetErrorFile( tdoc, errfil );
                    --argc;
                    ++argv;
                }
            }
            else if ( strcasecmp(arg,  "wrap") == 0 ||
                      strcasecmp(arg, "-wrap") == 0 ||
                      strcasecmp(arg,     "w") == 0 )
            {
                if ( argc >= 3 )
                {
                    uint wraplen = 0;
                    int nfields = sscanf( argv[2], "%u", &wraplen );
                    tidyOptSetInt( tdoc, TidyWrapLen, wraplen );
                    if (nfields > 0)
                    {
                        --argc;
                        ++argv;
                    }
                }
            }
            else if ( strcasecmp(arg,  "version") == 0 ||
                      strcasecmp(arg, "-version") == 0 ||
                      strcasecmp(arg,        "v") == 0 )
            {
                version();
                tidyRelease( tdoc );
                return 0;  /* success */

            }
            else if ( strncmp(argv[1], "--", 2 ) == 0)
            {
                if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) )
                {
                    /* Set new error output stream if setting changed */
                    ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile );
                    if ( post && (!errfil || !samefile(errfil, post)) )
                    {
                        errfil = post;
                        errout = tidySetErrorFile( tdoc, post );
                    }

                    ++argv;
                    --argc;
                }
            }

#if SUPPORT_ACCESSIBILITY_CHECKS
            else if ( strcasecmp(arg, "access") == 0 )
            {
                if ( argc >= 3 )
                {
                    uint acclvl = 0;
                    int nfields = sscanf( argv[2], "%u", &acclvl );
                    tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl );
                    if (nfields > 0)
                    {
                        --argc;
                        ++argv;
                    }
                }
            }
#endif

            else
            {
                uint c;
                ctmbstr s = argv[1];

                while ( (c = *++s) != '\0' )
                {
                    switch ( c )
                    {
                    case 'i':
                        tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
                        if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
                            tidyOptResetToDefault( tdoc, TidyIndentSpaces );
                        break;

                    /* Usurp -o for output file.  Anyone hiding end tags?
                    case 'o':
                        tidyOptSetBool( tdoc, TidyHideEndTags, yes );
                        break;
                    */

                    case 'u':
                        tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
                        break;

                    case 'c':
                        tidyOptSetBool( tdoc, TidyMakeClean, yes );
                        break;

                    case 'b':
                        tidyOptSetBool( tdoc, TidyMakeBare, yes );
                        break;

                    case 'n':
                        tidyOptSetBool( tdoc, TidyNumEntities, yes );
                        break;

                    case 'm':
                        tidyOptSetBool( tdoc, TidyWriteBack, yes );
                        break;

                    case 'e':
                        tidyOptSetBool( tdoc, TidyShowMarkup, no );
                        break;

                    case 'q':
                        tidyOptSetBool( tdoc, TidyQuiet, yes );
                        break;

                    default:
                        unknownOption( c );
                        break;
                    }
                }
            }

            --argc;
            ++argv;
            continue;
        }

        if ( argc > 1 )
        {
            htmlfil = argv[1];
            if ( tidyOptGetBool(tdoc, TidyEmacs) )
                tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil );
            status = tidyParseFile( tdoc, htmlfil );
        }
        else
        {
            htmlfil = "stdin";
            status = tidyParseStdin( tdoc );
        }

        if ( status >= 0 )
            status = tidyCleanAndRepair( tdoc );

        if ( status >= 0 )
            status = tidyRunDiagnostics( tdoc );

        if ( status > 1 ) /* If errors, do we want to force output? */
            status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 );

        if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) )
        {
            if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 )
                status = tidySaveFile( tdoc, htmlfil );
            else
            {
                ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
                if ( outfil )
                    status = tidySaveFile( tdoc, outfil );
                else
                    status = tidySaveStdout( tdoc );
            }
        }

        contentErrors   += tidyErrorCount( tdoc );
        contentWarnings += tidyWarningCount( tdoc );
        accessWarnings  += tidyAccessWarningCount( tdoc );

        --argc;
        ++argv;

        if ( argc <= 1 )
            break;
    }

    if (!tidyOptGetBool(tdoc, TidyQuiet) &&
        errout == stderr && !contentErrors)
        fprintf(errout, "\n");

    if (contentErrors + contentWarnings > 0 && 
         !tidyOptGetBool(tdoc, TidyQuiet))
        tidyErrorSummary(tdoc);

    if (!tidyOptGetBool(tdoc, TidyQuiet))
        tidyGeneralInfo(tdoc);

    /* called to free hash tables etc. */
    tidyRelease( tdoc );

    /* return status can be used by scripts */
    if ( contentErrors > 0 )
        return 2;

    if ( contentWarnings > 0 )
        return 1;

    /* 0 signifies all is ok */
    return 0;
}
Ejemplo n.º 12
0
    void FetchTaskHandler(const FetchTaskMessage &message, const Theron::Address from)
        {
            std::cout<<"get data.................."<<std::endl;

            std::string url=message.fi->url;
            char curl_errbuf[CURL_ERROR_SIZE];
            CURL *curl = curl_easy_init();
            int err;

            fetch::FetchResult *result=new fetch::FetchResult();
            fetch::FetchInfo fi=*(message.fi);
            delete message.fi;
            result->type=fetch::UNKNOWN; 
            result->url=fi.url;
            result->pathList=fi.pathList;
            result->attMap=fi.attMap;
            int errCode=0;
            if(curl!=NULL) {
                curl_easy_setopt(curl, CURLOPT_URL,url.c_str());
                curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
//                curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
//                curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
                curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10);
                curl_easy_setopt(curl, CURLOPT_NOSIGNAL,1);
                curl_easy_setopt(curl, CURLOPT_TIMEOUT ,60);

                curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fetch_write);
                
                std::stringstream iss;
                curl_easy_setopt(curl, CURLOPT_WRITEDATA, &iss);

                err=curl_easy_perform(curl);
                if ( !err ) 
                {
                    std::map<std::string,std::string>::iterator efit=fi.attMap.find("encode");
                    char *resStr=new char[iss.str().length()*3];
                    memset(resStr,0,iss.str().length()*3);
                    if(efit!=fi.attMap.end())
                    {
                        UErrorCode  error = U_ZERO_ERROR;
                        ucnv_convert("UTF-8",efit->second.c_str(),resStr,  iss.str().length()*3, iss.str().c_str(), iss.str().length(), &error );
                    }else
                    {
                        strcpy(resStr,iss.str().c_str());
                    }
                    TidyDoc tdoc;
//                TidyBuffer tidy_errbuf = {0};
//            TidyBuffer docbuf = {0};
                
                    tdoc = tidyCreate();
                    tidyOptSetInt(tdoc, TidyWrapLen, 4096);
//                tidySetErrorBuffer( tdoc, &tidy_errbuf );
                    tidyOptSetBool( tdoc, TidyXmlOut, yes );
                    tidyOptSetBool(tdoc, TidyQuoteNbsp, no);
                    tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ 
//                tidyOptSetBool( tdoc, TidyXmlDecl, yes );  
                    tidyOptSetBool(tdoc, TidyQuiet, yes);
                    tidyOptSetBool(tdoc, TidyShowWarnings, no);
                    tidyOptSetValue(tdoc,TidyDoctype,"omit");

                    tidyOptSetBool(tdoc, TidyFixBackslash, yes);
                    
                    tidyOptSetBool(tdoc, TidyMark, no);

                    tidySetCharEncoding(tdoc,"utf8");
//                tidyBufInit(&docbuf);
//                    err = tidyParseBuffer(tdoc, &docbuf); 
                    err = tidyParseString(tdoc, resStr); 
                    if ( err >= 0 ) {
                        err = tidyCleanAndRepair(tdoc); /* fix any problems */ 
                        if ( err >= 0 ) {
//                            err=tidyRunDiagnostics(tdoc); /* load tidy error buffer */ 
//                            if ( err >= 0 ) {
//                                std::cout<<"tidy error:"<<tidy_errbuf.bp<<std::endl; /* show errors */ 
                            TidyBuffer outbuf = {0};
                            tidyBufInit(&outbuf);
                            tidySaveBuffer( tdoc, &outbuf );
                            std::stringstream hss;
                            hss<<(char*)outbuf.bp;
                            tidyBufFree(&outbuf);
                            result->result=hss.str();
//                        }
//                        else
//                        {
//                                errCode=-5;
//                            }

                            
                        }else
                        {
                            errCode=-4;
                        }
                    }else
                    {
                        errCode=-3;
                    }
                    //                tidyBufFree(&tidy_errbuf);
//            tidyBufFree(&docbuf);
                    tidyRelease(tdoc);

                    delete [] resStr;
                }else
                {
                    errCode=-2;
                }
            }else
            {
                errCode=-1;
            }
            if(errCode<0)
            {
                std::stringstream ess;
                ess<<errCode;
                result->type=fetch::ERROR;
                result->result=ess.str();
            }
            curl_easy_cleanup(curl);
            Send(FetchResultMessage(result), from);
        }
Ejemplo n.º 13
0
int CCaHtmlParse::ParseCaHtmlFlights(std::list<SCaLowPriceFlightDetail*> & listFlight, const std::string& strHtmlData, const CStringA & straDCode, const CStringA & straACode, const SCaLowPriceFlightInfo*	pLowPriceFlightInfo)
{
	TidyDoc doc = tidyCreate();
	tidySetCharEncoding(doc,"raw");
	tidyParseString(doc,strHtmlData.c_str());
	TidyNode tnRoot = tidyGetRoot(doc);

	TidyNode tFlightTab;
	TidyNode tdChild;
	int nIndexTd = 0;

	CTime tCurrent = CTime::GetCurrentTime();
	SCaLowPriceFlightDetail *pfindFlight = NULL;
	if (FindNode(tnRoot,"class","CA_table mt_10 clear",tFlightTab))
	{
		//循环解析结算价,tblPolicy下的每一个子节点即为一条结算价信息
		TidyNode trFlight;
		int nIndexTr = 0;
		BOOL bValid = FALSE;
		CStringA straDPortCode = straDCode;
		CStringA straAPortCode = straACode;
		CStringA straFlightNo("");
		CStringA straFlightStartDate("");
		CStringA straSaleEndDate("");
		CStringA straSaleEndTime("");
		CStringA straFlightStartTime("");

		UINT uPrice = 0;
		UINT uRemainTicket = 0;
		for ( trFlight = tidyGetChild(tFlightTab); trFlight; trFlight = tidyGetNext(trFlight) )
		{
			if (0 == nIndexTr)//跳过表头
			{
				nIndexTr++;
				continue;
			}

			nIndexTd = 0;
			bValid = FALSE;
			straFlightNo = "";
			straFlightStartDate = "";
			straSaleEndDate = "";
			straSaleEndTime = "";
			straFlightStartTime = "";
			uPrice = 0;
			uRemainTicket = 0;
			for ( tdChild = tidyGetChild(trFlight); tdChild; tdChild = tidyGetNext(tdChild) )
			{
				switch(nIndexTd)
				{
				case 0:
					{
						//选择,是否为disabled
						bValid = __IsFlightValid(tdChild);
						TRACE(_T("Flight valid:%d-"), bValid);
						
					}
					break;
				case 1:
					{
						//日期/航班号
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						__GetFlightNoAndFlightStartDate(straFlightNo, straFlightStartDate, doc, tdChild);
						TRACE("date:%s, no:%s-", straFlightStartDate, straFlightNo);
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
					}
					break;
				case 2:
					{
						//起降时间
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						__GetFlightStartTime(straFlightStartTime, doc, tdChild);
					}
					break;
				case 3:
					{
						//机场
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						 //TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						if (__IsTwoAirPort(straDCode, straACode))
						{
							__GetAirPortCode(straDPortCode, straAPortCode, doc, tdChild);
							if(straDPortCode.IsEmpty())
								straDPortCode = straDCode;
							if(straAPortCode.IsEmpty())
								straAPortCode = straACode;
							TRACE("%s->%s-", straDPortCode, straAPortCode);
						}

					}
					break;
				case 4:
					{
						//销售结束日期,时间
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						//TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						__GetSaleEndDate(straSaleEndDate, straSaleEndTime, doc, tdChild);
						TRACE("sale end date:%s, %s-", straSaleEndDate, straSaleEndTime);
					}
					break;
				case 5:
					{
						//团购价
						//dumpNode(tdChild, 0);
						//TRACE(_T("\r\n"));
						//TRACE("%s\r\n", GetNodeContent(doc, tdChild));
						//CStringA straSetPrice = GetNodeContent(doc, tdChild);

						//double fSetPrice = atof(straSetPrice.GetBuffer(0));
						//straSetPrice.ReleaseBuffer();
						//tidyRelease(doc);
						//return fSetPrice;
						__GetPriceAndRamainTicket(&uPrice, &uRemainTicket, doc, tdChild);
						TRACE("price:%d, remain %d seats", uPrice, uRemainTicket);
					}
					break;
				}

				nIndexTd++;
			}
			TRACE(_T("\r\n"));

			//截至日期之后的航班不抓取
			//得到起飞日期
			int nFlightStartYear = 2014;
			int nFlightStartMonth = 12;
			int nFlightStartDay = 12;
			GetYearMonthDay(straFlightStartDate, &nFlightStartYear, &nFlightStartMonth, &nFlightStartDay);
			
			CTime tStart(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 0, 0, 0);
			//if (!m_bGetAllCaTuanFlight)
			//{
			//	if (tStart > m_tGetEndTime)
			//		continue;
			//}
			//
			
			//double d6 = pLowPriceFlightInfo->iMinHangPrice * 0.6;
			//UINT u6 = (UINT)d6;
			////6折以上普通团购退改签要收费(低价申请不受限制),所以不上
			//if (uPrice > d6 && CA_TUAN_PRODUCT == pLowPriceFlightInfo->iProductType)
			//{
			//	bValid = FALSE;
			//	uRemainTicket = 0;
			//	continue;
			//}
			//相同日期、时间、班次的航班,只取最低价
			BOOL bFind = __findCaFlight(&pfindFlight, straFlightStartDate, straDPortCode, straAPortCode, straFlightNo, listFlight);
			if (bFind)
			{
				int nCurPrice = (int)uPrice;
				//当前解析出的这个比上次解析出的便宜
				if(pfindFlight->nPrice > nCurPrice)
				{
					if (uRemainTicket > m_nMinTicketWarnNum)
					{
						//当前票的数量充足时,用当前票的数量更新上次解析出的数量
						pfindFlight->nRemainSeat = uRemainTicket;
						pfindFlight->nPrice = nCurPrice;
						pfindFlight = NULL;
					}
				}
				else //(pfindFlight->nPrice <= nCurPrice)
				{
					if(pfindFlight->nRemainSeat <= m_nMinTicketWarnNum)
					{
						pfindFlight->nRemainSeat = uRemainTicket;
						pfindFlight->nPrice = nCurPrice;
						pfindFlight = NULL;
					}
				}

				continue;
			}

			//保存解析出来的航班信息,调用者负责释放内存
			if (bValid)
			{
				SCaLowPriceFlightDetail* pDetail = new SCaLowPriceFlightDetail;
				pDetail->straCompany = "CA";	
				pDetail->straFromCityCode = straDPortCode;	
				pDetail->straToCityCode = straAPortCode;	
				pDetail->straFlightNo = straFlightNo;		
				pDetail->straFromDate = straFlightStartDate;	
				//由于携程订单进入需要一定的时间,国航下班16:00下班,所以当天的票,第2天12:00之前的票,销售结束时间提前30分钟,	
				//取销售间隔
				int nSaleEndYear = 2014;
				int nSaleEndMonth = 12;
				int nSaleEndDay = 12;
				GetYearMonthDay(straSaleEndDate, &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay);
				int nSaleEndHour = 12;
				int nSaleEndMin = 0;
				GetHourMinSec(straSaleEndTime, &nSaleEndHour, &nSaleEndMin);
				CTime tSaleEndDate(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, 0);
				CTimeSpan tSpan = tSaleEndDate - tCurrent;
				//end 取销售间隔
				//得到起飞时间
				int nFlightStartHour = 12;
				int nFlightStartMin = 0;
				GetHourMinSec(straFlightStartTime, &nFlightStartHour, &nFlightStartMin);
				CTime tFlightStartTime(nFlightStartYear, nFlightStartMonth, nFlightStartDay, nFlightStartHour, nFlightStartMin, 0);
				CTime tTimeKey(nFlightStartYear, nFlightStartMonth, nFlightStartDay, 12, 0, 0);
				//end 得到起飞时间
	
				//今明两天的、起飞时间在12点之前、且是低价申请的,销售结束时间为 前一天的官网销售结束的前30分钟
				if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (1 == tSpan.GetDays()))//明天的的低价申请
				{	
					if(tFlightStartTime <= tTimeKey)//明天12起飞的低价申请, 今天下午3:25前有效(国航4点下班)
					{
						pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay());
						CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 25, 0);
						pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0);	
					}
					else//明天12后起飞的低价申请,明早可以出票
					{
						pDetail->straSaleEndDate = straSaleEndDate;
						pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0);
					}
				}
				else if ((CA_TUAN_LOW_PRICE_APPLY_PRODUT == pLowPriceFlightInfo->iProductType) && (tSpan.GetDays() < 1))//今天的的低价申请,今天下午3:30前有效(国航4点下班)
				{
					pDetail->straSaleEndDate.Format("%d-%02d-%02d", tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay());
					CTime tSaleEnd(tCurrent.GetYear(), tCurrent.GetMonth(), tCurrent.GetDay(), 15, 30, 0);
					pDetail->straSaleEndTime.Format("%02d:%02d:%02d", tSaleEnd.GetHour(), tSaleEnd.GetMinute(), 0);	
				}
				else//普通团购,后天及以后的低价申请
				{
					pDetail->straSaleEndDate = straSaleEndDate;
					pDetail->straSaleEndTime.Format("%02d:%02d:%02d", nSaleEndHour, nSaleEndMin, 0);
				}

				//政策销售时间到,删除政策
				GetYearMonthDay(pDetail->straSaleEndDate,  &nSaleEndYear, &nSaleEndMonth, &nSaleEndDay);
				int nSaleEndSec = 0;
				GetHourMinSec(pDetail->straSaleEndTime, &nSaleEndHour, &nSaleEndMin, &nSaleEndSec);
				CTime tPolicyDeleteTime(nSaleEndYear, nSaleEndMonth, nSaleEndDay, nSaleEndHour, nSaleEndMin, nSaleEndSec);
				if (tCurrent >= tPolicyDeleteTime)
					uRemainTicket = 0;

				pDetail->nPrice = uPrice;				
				pDetail->nProductId = pLowPriceFlightInfo->iProductId;			
				pDetail->nRemainSeat = uRemainTicket;	
				pDetail->nProductType = pLowPriceFlightInfo->iProductType;

				listFlight.push_back(pDetail);
			}
		}
	}

	tidyRelease(doc);	

	return -1.0;
}
Ejemplo n.º 14
0
void HTidyInterface::SetTidyControl( TidyDoc tdoc, LPCTSTR lpszTidy, int nPos, int nSize )
{
	if (lpszTidy[nPos] != '-' || nSize < 2)
	{
		return;
	}

	int nNumValue = nSize;
	for (int i = nPos + 1; i < nPos + nSize; ++i)
	{
		if (!_istalpha(lpszTidy[i]))
		{
			nNumValue = i - nPos;
			break;
		}
	}
	CString strParam(lpszTidy + nPos + 1, nNumValue - 1);
	if (nNumValue != nSize)
	{
		CString strNum(lpszTidy + nPos + nNumValue, nSize - nNumValue);
		nNumValue = _ttoi(strNum);
	}
	else
	{
		nNumValue = 0;
	}
	CString strNumValue;
	strNumValue.Format(_T("%d"), nNumValue);

	CString strNothing;
	strNothing.Empty();
	if (_T("axd") == strParam)
	{
		tidyOptSetBool(tdoc, TidyXmlDecl, yes);
	} 
	else if (_T("axs") == strParam)
	{
		tidyOptSetBool(tdoc, TidyXmlSpace, yes);
	}
	else if (_T("aan") == strParam)
	{
		tidyOptSetBool(tdoc, TidyAnchorAsName, yes);
	}
	else if (_T("axp") == strParam)
	{
		tidyOptSetBool(tdoc, TidyXmlPIs, yes);
	}
	else if (_T("b") == strParam)
	{
		tidyOptSetBool(tdoc, TidyMakeBare, yes);
	}
	else if (_T("c") == strParam)
	{
		tidyOptSetBool(tdoc, TidyMakeClean, yes);
	}
	else if (_T("diu") == strParam)
	{
		tidyOptSetBool(tdoc, TidyDecorateInferredUL, yes);
	}
	else if (_T("dep") == strParam)
	{
		tidyOptSetBool(tdoc, TidyDropEmptyParas, yes);
	}
	else if (_T("dft") == strParam)
	{
		tidyOptSetBool(tdoc, TidyDropFontTags, yes);
	}
	else if (_T("dpa") == strParam)
	{
		tidyOptSetBool(tdoc, TidyDropPropAttrs, yes);
	}
	else if (_T("ebt") == strParam)
	{
		tidyOptSetBool(tdoc, TidyEncloseBlockText, yes);
	}
	else if (_T("et") == strParam)
	{
		tidyOptSetBool(tdoc, TidyEncloseBodyText, yes);
	}
	else if (_T("ec") == strParam)
	{
		tidyOptSetBool(tdoc, TidyEscapeCdata, yes);
	}
	else if (_T("fb") == strParam)
	{
		tidyOptSetBool(tdoc, TidyFixBackslash, yes);
	}
	else if (_T("fbc") == strParam)
	{
		tidyOptSetBool(tdoc, TidyFixComments, yes);
	}
	else if (_T("fu") == strParam)
	{
		tidyOptSetBool(tdoc, TidyFixUri, yes);
	}
	else if (_T("hc") == strParam)
	{
		tidyOptSetBool(tdoc, TidyHideComments, yes);
	}
	else if (_T("he") == strParam)
	{
		tidyOptSetBool(tdoc, TidyHideEndTags, yes);
	}
	else if (_T("ic") == strParam)
	{
		tidyOptSetBool(tdoc, TidyIndentCdata, yes);
	}
	else if (_T("ix") == strParam)
	{
		tidyOptSetBool(tdoc, TidyXmlTags, yes);
	}
	else if (_T("jc") == strParam)
	{
		tidyOptSetBool(tdoc, TidyJoinClasses, yes);
	}
	else if (_T("js") == strParam)
	{
		tidyOptSetBool(tdoc, TidyJoinStyles, yes);
	}
	else if (_T("la") == strParam)
	{
		tidyOptSetBool(tdoc, TidyLiteralAttribs, yes);
	}
	else if (_T("le") == strParam)
	{
		tidyOptSetBool(tdoc, TidyLogicalEmphasis, yes);
	}
	else if (_T("ll") == strParam)
	{
		tidyOptSetBool(tdoc, TidyLowerLiterals, yes);
	}
	else if (_T("n") == strParam)
	{
		tidyOptSetBool(tdoc, TidyNCR, yes);
	}
	else if (_T("ne") == strParam)
	{
		tidyOptSetBool(tdoc, TidyNumEntities, yes);
	}
	else if (_T("oh") == strParam)
	{
		tidyOptSetBool(tdoc, TidyHtmlOut, yes);
	}
	else if (_T("ox") == strParam)
	{
		tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
	}
	else if (_T("oxm") == strParam)
	{
		tidyOptSetBool(tdoc, TidyXmlOut, yes);
	}
	else if (_T("pe") == strParam)
	{
		tidyOptSetBool(tdoc, TidyPreserveEntities, yes);
	}
	else if (_T("qa") == strParam)
	{
		tidyOptSetBool(tdoc, TidyQuoteAmpersand, yes);
	}
	else if (_T("qm") == strParam)
	{
		tidyOptSetBool(tdoc, TidyQuoteMarks, yes);
	}
	else if (_T("qn") == strParam)
	{
		tidyOptSetBool(tdoc, TidyQuoteNbsp, yes);
	}
	else if (_T("rc") == strParam)
	{
		tidyOptSetBool(tdoc, TidyReplaceColor, yes);
	}
	else if (_T("ua") == strParam)
	{
		tidyOptSetBool(tdoc, TidyUpperCaseAttrs, yes);
	}
	else if (_T("ut") == strParam)
	{
		tidyOptSetBool(tdoc, TidyUpperCaseTags, yes);
	}
	else if (_T("wo") == strParam)
	{
		tidyOptSetBool(tdoc, TidyWord2000, yes);
	}
	else if (_T("bbb") == strParam)
	{
		tidyOptSetBool(tdoc, TidyBreakBeforeBR, yes);
	} 
	else if (_T("ia") == strParam)
	{
		tidyOptSetBool(tdoc, TidyIndentAttributes, yes);
	}
	else if (_T("m") == strParam)
	{
		tidyOptSetBool(tdoc, TidyShowMarkup, yes);
	}
	else if (_T("pw") == strParam)
	{
		tidyOptSetBool(tdoc, TidyPunctWrap, yes);
	}
	else if (_T("vs") == strParam)
	{
		tidyOptSetBool(tdoc, TidyVertSpace, yes);
	}
	else if (_T("wa") == strParam)
	{
		tidyOptSetBool(tdoc, TidyWrapAsp, yes);
	}
	else if (_T("wat") == strParam)
	{
		tidyOptSetBool(tdoc, TidyWrapAttVals, yes);
	}
	else if (_T("wj") == strParam)
	{
		tidyOptSetBool(tdoc, TidyWrapJste, yes);
	}
	else if (_T("wp") == strParam)
	{
		tidyOptSetBool(tdoc, TidyWrapPhp, yes);
	}
	else if (_T("wsl") == strParam)
	{
		tidyOptSetBool(tdoc, TidyWrapScriptlets, yes);
	}
	else if (_T("ws") == strParam)
	{
		tidyOptSetBool(tdoc, TidyWrapSection, yes);
	}
	else if (_T("ac") == strParam)
	{
		tidyOptSetBool(tdoc, TidyAsciiChars, yes);
	}
	else if (_T("sw") == strParam)
	{
		tidyOptSetBool(tdoc, TidyShowWarnings, yes);
	}
	else if (_T("fo") == strParam)
	{
		tidyOptSetBool(tdoc, TidyForceOutput, yes);
	}
	else if (_T("i") == strParam)
	{
		tidyOptSetInt(tdoc, TidyIndentContent, abs(nNumValue - 2) % 3);
	}
	else if (_T("md") == strParam)
	{
		tidyOptSetInt(tdoc, TidyMergeDivs, abs(nNumValue - 2) % 3);
	}
	else if (_T("ms") == strParam)
	{
		tidyOptSetInt(tdoc, TidyMergeSpans, abs(nNumValue - 2) % 3);
	}
	else if (_T("sbo") == strParam)
	{
		tidyOptSetInt(tdoc, TidyBodyOnly, abs(nNumValue - 2) % 3);
	}
	else if (_T("d") == strParam)
	{
		tidyOptSetInt(tdoc, TidyDoctypeMode, nNumValue % 5);
	}
	else if (_T("ra") == strParam)
	{
		tidyOptSetInt(tdoc, TidyDuplicateAttrs, nNumValue % 2);
	}
	else if (_T("sa") == strParam)
	{
		tidyOptSetInt(tdoc, TidySortAttributes, nNumValue % 2);
	}
	else if (_T("ce") == strParam)
	{		
		tidySetCharEncoding(tdoc, GetEncodeByIndex(nNumValue));
	}
	else if (_T("ie") == strParam)
	{
		tidySetInCharEncoding(tdoc, GetEncodeByIndex(nNumValue));
	}
	else if (_T("oe") == strParam)
	{
		tidySetOutCharEncoding(tdoc, GetEncodeByIndex(nNumValue));
	}
	else if (_T("se") == strParam)
	{
		tidyOptSetInt(tdoc, TidyShowErrors, nNumValue);
	}
	else if (_T("is") == strParam)
	{
		tidyOptSetInt(tdoc, TidyIndentSpaces, nNumValue);
	}
	else if (_T("ts") == strParam)
	{
		tidyOptSetInt(tdoc, TidyTabSize, nNumValue);
	}
	else if (_T("w") == strParam)
	{
		tidyOptSetInt(tdoc, TidyWrapLen, nNumValue);
	}
	else if (_T("at") == strParam)
	{
		tidyOptSetValue(tdoc, TidyAltText, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_at:strNothing));
	}
	else if (_T("cp") == strParam)
	{
		tidyOptSetValue(tdoc, TidyCSSPrefix, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_cp:strNothing));
	}
	else if (_T("nbt") == strParam)
	{
		tidyOptSetValue(tdoc, TidyBlockTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_nbt:strNothing));
	}
	else if (_T("net") == strParam)
	{
		tidyOptSetValue(tdoc, TidyEmptyTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_net:strNothing));
	}
	else if (_T("nit") == strParam)
	{
		tidyOptSetValue(tdoc, TidyInlineTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_nit:strNothing));
	}
	else if (_T("npt") == strParam)
	{
		tidyOptSetValue(tdoc, TidyPreTags, CT2A(m_onlyHtmlXml == SYN_HTML?g_GlobalTidy.m_TidyHtml_npt:strNothing));
	}
}
Ejemplo n.º 15
0
	/*!
	* \fn static int TidyHtml(const char *pcSourcePage, string &sDestPage);
	* \brief  修补丢失、错误标签
	* \param  [in]待修补网页字符串
	* \param  [out]修补后的网页string
	* \return 结果码,==0修补正确,<0修补失败
	* \date   2011-06-01 
	* \author nanjunxiao
	*/
	int Pretreat::TidyHtml(const char *pcSourcePage, std::string &sDestPage)
	{
		int iReturn = 0;
		TidyBuffer errbuf = {0};
		TidyDoc tdoc;
		tmbstr pBuffer = NULL;

		try
		{
			if ( (pcSourcePage == NULL) || (strlen(pcSourcePage) ==0 ) )
			{
				//cerr << "TidyHtml 输入页面为空!" << endl;
				throw (-1);
			}

			int iRet = -1;
			Bool bOk;
			uint uiBufLen;
			int iBufSize;
			tdoc = tidyCreate();// Initialize "document"
			bOk = tidyOptSetBool(tdoc, TidyXhtmlOut, yes);// Convert to XHTML
			if (bOk)
			{
				iRet = tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				iRet = tidySetCharEncoding(tdoc,"utf8"); //Ensure dealing with gb2312 successfully
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				string htmlsrc = pcSourcePage;
				iRet = tidyParseString (tdoc, htmlsrc.c_str() ); // Parse the input
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				iRet = tidyCleanAndRepair(tdoc); //Tidy it up!
			}
			else
			{
				throw (-1);
			}
			
			if (iRet >= 0)
			{
				iRet = tidyRunDiagnostics(tdoc); //Kvetch
			}
			else
			{
				throw (-1);
			}

			if(iRet > 1) // If error, force output.
			{
				iRet = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? iRet : -1 );
			}
			else if (iRet < 0)
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				// Pretty Print
				iBufSize = 1024 * 1024 * 5;
				uiBufLen = iBufSize;
				pBuffer = new char [iBufSize];
				memset(pBuffer, '\0', iBufSize);
				iRet = tidySaveString(tdoc, pBuffer, &uiBufLen);
			}
			else
			{
				throw (-1);
			}

			if (iRet >= 0)
			{
				sDestPage = pBuffer;
			}
			else if (iRet == -ENOMEM)
			{
				//pBuffer 长度不够
				//cerr << "TidyHtml pBuffer长度不够!" << endl;
				throw (-1);
			}
			else
			{
				throw (-1);
			}
		}
		catch(exception &err)
		{
			//cerr << "TidyHtml HtmlTidy修补页面失败! " << err.what() << endl;
			iReturn = -1;
		}
		catch(int iThrow)
		{
			if (iThrow < 0)
			{
				//cerr << "TidyHtml HtmlTidy修补页面失败!" << endl;
			}
			iReturn = iThrow;
		}
		catch(...)
		{
			//cerr << "TidyHtml HtmlTidy修补页面失败!" << endl;
			iReturn = -1;
		}

		tidyBufFree(&errbuf);
		tidyRelease(tdoc);
		if (pBuffer != NULL)
		{
			delete [] pBuffer;
			pBuffer = NULL;
		}
		return iReturn;
	}