/* the entry point */ void html2nodes(const char *htmltext, bool startpage) { char *htmlfix = 0; tdoc = tidyCreate(); if (!startpage) tidyOptSetInt(tdoc, TidyBodyOnly, yes); tidySetReportFilter(tdoc, tidyErrorHandler); // tidySetReportFilter(tdoc, tidyReportFilter); tidySetCharEncoding(tdoc, (cons_utf8 ? "utf8" : "latin1")); htmlfix = tidyPreprocess(htmltext); if (htmlfix) { tidyParseString(tdoc, htmlfix); nzFree(htmlfix); } else tidyParseString(tdoc, htmltext); tidyCleanAndRepair(tdoc); if (debugLevel >= 5) { traverse_tidycall = printNode; traverseTidy(); } /* convert tidy nodes into edbrowse nodes */ traverse_tidycall = convertNode; traverseTidy(); tidyRelease(tdoc); } /* html2nodes */
void parse_urls(const char *filename, const url_list_t *elem) { TidyDoc tdoc; int err; FILE *outfile = NULL; tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); tidyOptSetBool(tdoc, TidyMark, no); tidyOptSetBool(tdoc, TidyHideEndTags, yes); tidyOptSetBool(tdoc, TidyDropEmptyParas, no); tidyOptSetBool(tdoc, TidyJoinStyles, no); tidyOptSetBool(tdoc, TidyPreserveEntities, yes); tidyOptSetInt(tdoc, TidyMergeDivs, no); tidyOptSetInt(tdoc, TidyMergeSpans, no); tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidyOptSetValue(tdoc, TidyCharEncoding, "utf8"); tidySetReportFilter(tdoc, filter_cb); err = tidyParseFile(tdoc, filename); if (err >= 0) err = tidyCleanAndRepair(tdoc); if (err >= 0) { outfile = option_values.save_relative_links && !option_values.disable_save_tree ? fopen(filename, "w") : NULL; parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile); if (outfile) fclose(outfile); } tidyRelease(tdoc); }