Пример #1
0
/* the entry point */
void html2nodes(const char *htmltext, bool startpage)
{
	char *htmlfix = 0;

	tdoc = tidyCreate();
	if (!startpage)
		tidyOptSetInt(tdoc, TidyBodyOnly, yes);
	tidySetReportFilter(tdoc, tidyErrorHandler);
//    tidySetReportFilter(tdoc, tidyReportFilter);

	tidySetCharEncoding(tdoc, (cons_utf8 ? "utf8" : "latin1"));

	htmlfix = tidyPreprocess(htmltext);
	if (htmlfix) {
		tidyParseString(tdoc, htmlfix);
		nzFree(htmlfix);
	} else
		tidyParseString(tdoc, htmltext);

	tidyCleanAndRepair(tdoc);

	if (debugLevel >= 5) {
		traverse_tidycall = printNode;
		traverseTidy();
	}

/* convert tidy nodes into edbrowse nodes */
	traverse_tidycall = convertNode;
	traverseTidy();

	tidyRelease(tdoc);
}				/* html2nodes */
Пример #2
0
void parse_urls(const char *filename, const url_list_t *elem)
{
	TidyDoc tdoc;
	int err;
	FILE *outfile = NULL;

	tdoc = tidyCreate();
	tidyOptSetBool(tdoc, TidyForceOutput, yes);
	tidyOptSetBool(tdoc, TidyMark, no);
	tidyOptSetBool(tdoc, TidyHideEndTags, yes);
	tidyOptSetBool(tdoc, TidyDropEmptyParas, no);
	tidyOptSetBool(tdoc, TidyJoinStyles, no);
	tidyOptSetBool(tdoc, TidyPreserveEntities, yes);
	tidyOptSetInt(tdoc, TidyMergeDivs, no);
	tidyOptSetInt(tdoc, TidyMergeSpans, no);
	tidyOptSetInt(tdoc, TidyWrapLen, 4096);
	tidyOptSetValue(tdoc, TidyCharEncoding, "utf8");
	tidySetReportFilter(tdoc, filter_cb);

	err = tidyParseFile(tdoc, filename);

	if (err >= 0) 
		err = tidyCleanAndRepair(tdoc);

	if (err >= 0) {
		outfile = option_values.save_relative_links && !option_values.disable_save_tree
			? fopen(filename, "w") : NULL;

		parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile);

		if (outfile)
			fclose(outfile);
	}

	tidyRelease(tdoc);
}