Пример #1
0
int main(int argc, char **argv )
{
  CURL *curl;
  char curl_errbuf[CURL_ERROR_SIZE];
  TidyDoc tdoc;
  TidyBuffer docbuf = {0};
  TidyBuffer tidy_errbuf = {0};
  int err;
  if ( argc == 2) {
    curl = curl_easy_init();
    curl_easy_setopt(curl, CURLOPT_URL, argv[1]);
    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);

    tdoc = tidyCreate();
    tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */
    tidyOptSetInt(tdoc, TidyWrapLen, 4096);
    tidySetErrorBuffer( tdoc, &tidy_errbuf );
    tidyBufInit(&docbuf);

    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf);
    err=curl_easy_perform(curl);
    if ( !err ) {
      err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */
      if ( err >= 0 ) {
        err = tidyCleanAndRepair(tdoc); /* fix any problems */
        if ( err >= 0 ) {
          err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */
          if ( err >= 0 ) {
            dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */
            fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */
          }
        }
      }
    }
    else
      fprintf(stderr, "%s\n", curl_errbuf);

    /* clean-up */
    curl_easy_cleanup(curl);
    tidyBufFree(&docbuf);
    tidyBufFree(&tidy_errbuf);
    tidyRelease(tdoc);
    return(err);

  }
  else
    printf( "usage: %s <url>\n", argv[0] );

  return(0);
}
Пример #2
0
static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
{
	int status = FAILURE;
	TidyDoc doc;
	TidyBuffer inbuf, outbuf, errbuf;

	if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
		doc = tidyCreate();
		tidyBufInit(&errbuf);

		if (0 == tidySetErrorBuffer(doc, &errbuf)) {
			tidyOptSetBool(doc, TidyForceOutput, yes);
			tidyOptSetBool(doc, TidyMark, no);

			if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
				php_error_docref(NULL, E_WARNING, "Input string is too long");
				return status;
			}

			TIDY_SET_DEFAULT_CONFIG(doc);

			tidyBufInit(&inbuf);
			tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint)output_context->in.used);

			if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
				tidyBufInit(&outbuf);
				tidySaveBuffer(doc, &outbuf);
				FIX_BUFFER(&outbuf);
				output_context->out.data = (char *) outbuf.bp;
				output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
				output_context->out.free = 1;
				status = SUCCESS;
			}
		}

		tidyRelease(doc);
		tidyBufFree(&errbuf);
	}

	return status;
}
Пример #3
0
static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint len, char *enc)
{
	TidyBuffer buf;

	if(enc) {
		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
			return FAILURE;
		}
	}

	obj->ptdoc->initialized = 1;

	tidyBufInit(&buf);
	tidyBufAttach(&buf, (byte *) string, len);
	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
		php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
		return FAILURE;
	}
	tidy_doc_update_properties(obj);

	return SUCCESS;
}
Пример #4
0
static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
{
	char *enc = NULL;
	size_t enc_len = 0;
	zend_bool use_include_path = 0;
	TidyDoc doc;
	TidyBuffer *errbuf;
	zend_string *data, *arg1;
	zval *config = NULL;

	if (is_file) {
		if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
			RETURN_FALSE;
		}
		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
			RETURN_FALSE;
		}
	} else {
		if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
			RETURN_FALSE;
		}
		data = arg1;
	}

	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
		php_error_docref(NULL, E_WARNING, "Input string is too long");
		RETURN_FALSE;
	}

	doc = tidyCreate();
	errbuf = emalloc(sizeof(TidyBuffer));
	tidyBufInit(errbuf);

	if (tidySetErrorBuffer(doc, errbuf) != 0) {
		tidyBufFree(errbuf);
		efree(errbuf);
		tidyRelease(doc);
		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
	}

	tidyOptSetBool(doc, TidyForceOutput, yes);
	tidyOptSetBool(doc, TidyMark, no);

	TIDY_SET_DEFAULT_CONFIG(doc);

	if (config) {
		TIDY_APPLY_CONFIG_ZVAL(doc, config);
	}

	if(enc_len) {
		if (tidySetCharEncoding(doc, enc) < 0) {
			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
			RETVAL_FALSE;
		}
	}

	if (data) {
		TidyBuffer buf;

		tidyBufInit(&buf);
		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint)ZSTR_LEN(data));

		if (tidyParseBuffer(doc, &buf) < 0) {
			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
			RETVAL_FALSE;
		} else {
			if (tidyCleanAndRepair(doc) >= 0) {
				TidyBuffer output;
				tidyBufInit(&output);

				tidySaveBuffer (doc, &output);
				FIX_BUFFER(&output);
				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
				tidyBufFree(&output);
			} else {
				RETVAL_FALSE;
			}
		}
	}

	if (is_file) {
		zend_string_release(data);
	}

	tidyBufFree(errbuf);
	efree(errbuf);
	tidyRelease(doc);
}
Пример #5
0
bool TidyReader::openFile (const char * szFilename)
{
    UT_DEBUGMSG(("using libtidy to parse HTML...\n"));

    m_tidy = tidyCreate ();
    if (m_tidy == 0) return false;

    if (tidyOptSetBool (m_tidy, TidyXhtmlOut, yes) == 0)
    {
        UT_DEBUGMSG(("tidyOptSetBool failed!\n"));
        closeFile ();
        return false;
    }
#ifndef DEBUG
    tidySetErrorBuffer (m_tidy, &m_errbuf);
#endif

    int parse_status;
    if (m_buffer && m_length)
    {
        UT_DEBUGMSG(("parse HTML in buffer...\n"));

        UT_Byte * buffer = const_cast<UT_Byte *>(m_buffer); // grr.

        TidyBuffer inbuf;

        tidyBufInit (&inbuf);
        tidyBufAttach (&inbuf, buffer, static_cast<unsigned int>(m_length));

        parse_status = tidyParseBuffer (m_tidy, &inbuf);

        tidyBufDetach (&inbuf);
    }
    else
    {
        UT_DEBUGMSG(("parse HTML in file: %s\n",szFilename));
        parse_status = tidyParseFile (m_tidy, szFilename);
    }
    if (parse_status < 0)
    {
        UT_DEBUGMSG(("tidyParseBuffer/File failed!\n"));
        closeFile ();
        return false;
    }

    parse_status = tidyCleanAndRepair (m_tidy);
    if (parse_status < 0)
    {
        UT_DEBUGMSG(("tidyCleanAndRepair failed!\n"));
        closeFile ();
        return false;
    }

    parse_status = tidyRunDiagnostics (m_tidy);
    if (parse_status < 0)
    {
        UT_DEBUGMSG(("tidyRunDiagnostics failed!\n"));
        closeFile ();
        return false;
    }

    if (parse_status > 1)
    {
        parse_status = (tidyOptSetBool (m_tidy, TidyForceOutput, yes) ? parse_status : -1);
    }
    if (parse_status < 0)
    {
        UT_DEBUGMSG(("tidyOptSetBool failed!\n"));
        closeFile ();
        return false;
    }

    parse_status = tidySaveBuffer (m_tidy, &m_outbuf);
    if (parse_status < 0)
    {
        UT_DEBUGMSG(("tidySaveBuffer failed!\n"));
        closeFile ();
        return false;
    }
    UT_DEBUGMSG(("tidy succeeded!\n"));
#ifdef DEBUG
    fputs ("================================================================\n", stderr);
    fputs ((const char *) m_outbuf.bp, stderr);
    fputs ("================================================================\n", stderr);
#endif
    m_outbuf.next = 0;

    return true;
}
Пример #6
0
int main(int argc, char **argv )
{
   CURL *curl;
   char curl_errbuf[CURL_ERROR_SIZE];
   char url[URL_BUF_SIZE];
   char *username;
   TidyDoc tdoc;
   TidyBuffer docbuf = {0};
   TidyBuffer tidy_errbuf = {0};
   int err;
   if ( argc == 2) 
   {
      username = argv[1];
   }
   else
   {
      username = "******";
   }
   WeatherData data;
   snprintf(url, URL_BUF_SIZE, "http://www.weatherlink.com/user/%s/index.php?view=summary&headers=0&type=2", username);
   curl = curl_easy_init();
   curl_easy_setopt(curl, CURLOPT_URL, url);
   curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
   curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L);
   curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L);
   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);

   tdoc = tidyCreate();
   tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */
   tidyOptSetInt(tdoc, TidyWrapLen, 4096);
   tidySetErrorBuffer( tdoc, &tidy_errbuf );
   tidyBufInit(&docbuf);

   curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf);
   err=curl_easy_perform(curl);
   if ( !err ) 
   {
      err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */
      if ( err >= 0 ) 
      {
         err = tidyCleanAndRepair(tdoc); /* fix any problems */
         if ( err >= 0 ) 
         {
            dumpNode( tdoc, tidyGetRoot(tdoc), 0, &data ); /* walk the tree */
            //err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */
            //if ( err >= 0 ) 
            //{
               //dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */
            //   fprintf(stderr, ">> %s\n", tidy_errbuf.bp); /* show errors */
            //}
         }
      }
   }
   else
   {
      fprintf(stderr, "%s\n", curl_errbuf);
   }
   printf("Outside temp: %f\n", data.outsideTemp );
   printf("Outside humidity: %d\n", data.outsideHumidity );
   printf("Dew Point: %f\n", data.dewPoint );
   printf("Barometer: %f\n", data.barometer );
   printf("Wind speed: %f\n", data.instantWindSpeed );
   printf("Wind direction: %d\n", data.instantWindDirection );
   printf("Average Wind: %f\n", data.avgWindSpeed_2min );
   printf("Wind Gust: %f\n", data.windGust_10min);
   printf("rainRate: %f\n", data.rainRate );
   printf("dailyRain: %f\n", data.dailyRain );
   printf("lastHourRain: %f\n", data.lastHourRain );

   /* clean-up */
   curl_easy_cleanup(curl);
   tidyBufFree(&docbuf);
   //tidyBufFree(&tidy_errbuf);
   tidyRelease(tdoc);
   return(err);


  return(0);
}