char *copyTidyNodeValue(TidyNode tnode, TidyDoc tdoc) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc,tnode,&buf); char *str = (char *)xmalloc(buf.size+1); memcpy(str,buf.bp,buf.size); str[buf.size] = '\0'; tidyBufFree(&buf); return str; }
void nuiHTMLNode::SetFromNode(const void* _tdoc, const void* _tnod, nglTextEncoding encoding, bool ComputeStyle) { Clear(); TidyDoc tdoc = (TidyDoc)_tdoc; TidyNode tnod = (TidyNode)_tnod; mName = nglString(tidyNodeGetName(tnod), encoding); mType = (NodeType)tidyNodeGetType(tnod); mTagType = (TagType)tidyNodeGetId(tnod); nglString text; TidyBuffer buf; tidyBufInit(&buf); if (tidyNodeGetValue(tdoc, tnod, &buf)) { mText.Import((const char*)buf.bp, (int32)buf.size, encoding); //NGL_OUT(_T("<%s> %s\n"), mName.GetChars(), mText.GetChars()); } tidyBufFree(&buf); // Fill the attributes: TidyAttr tattr; for (tattr = tidyAttrFirst(tnod); tattr; tattr = tidyAttrNext(tattr)) { nuiHTMLAttrib* pAttrib = new nuiHTMLAttrib(tattr, encoding); mAttributes.push_back(pAttrib); } if (ComputeStyle) { mpStyle = new nuiCSSStyle(this); nuiHTMLAttrib* pStyle = GetAttribute(nuiHTMLAttrib::eAttrib_STYLE); if (pStyle) { AddStyleSheet(GetSourceURL(), pStyle->GetValue(), true); } } }
static void convertNode(TidyNode node, int level, bool opentag) { ctmbstr name; TidyAttr tattr; struct htmlTag *t; int nattr; /* number of attributes */ int i; switch (tidyNodeGetType(node)) { case TidyNode_Text: name = "Text"; break; case TidyNode_Start: case TidyNode_End: case TidyNode_StartEnd: name = tidyNodeGetName(node); break; default: return; } t = newTag((char *)name); if (!t) return; if (!opentag) { t->slash = true; return; } /* if a js script, remember the line number for error messages */ if (t->action == TAGACT_SCRIPT) t->js_ln = tidyNodeLine(node); /* this is the open tag, set the attributes */ /* special case for text tag */ if (t->action == TAGACT_TEXT) { TidyBuffer tnv = { 0 }; /* text-node value */ tidyBufClear(&tnv); tidyNodeGetValue(tdoc, node, &tnv); if (tnv.size) { t->textval = cloneString(tnv.bp); tidyBufFree(&tnv); } } nattr = 0; tattr = tidyAttrFirst(node); while (tattr != NULL) { ++nattr; tattr = tidyAttrNext(tattr); } t->attributes = allocMem(sizeof(char *) * (nattr + 1)); t->atvals = allocMem(sizeof(char *) * (nattr + 1)); i = 0; tattr = tidyAttrFirst(node); while (tattr != NULL) { t->attributes[i] = cloneString(tidyAttrName(tattr)); t->atvals[i] = cloneString(tidyAttrValue(tattr)); ++i; tattr = tidyAttrNext(tattr); } t->attributes[i] = 0; t->atvals[i] = 0; /* innerHTML, only for certain tags */ if (t->info->bits & TAG_INNERHTML) { TidyBuffer tnv = { 0 }; /* text-node value */ tidyBufClear(&tnv); t->innerHTML = emptyString; tidyNodeGetText(tdoc, node, &tnv); if (tnv.size) { /* But it's not the original html, it has been sanitized. * Put a cap on size, else memory consumed could, theoretically, * grow as the size of the document squared. */ if (tnv.size <= 4096) t->innerHTML = cloneString(tnv.bp); tagStrip(t->innerHTML); tidyBufFree(&tnv); } } } /* convertNode */
/* this is strictly for debugging, level >= 5 */ static void printNode(TidyNode node, int level, bool opentag) { ctmbstr name; TidyAttr tattr; if (!opentag) { puts("}"); return; } switch (tidyNodeGetType(node)) { case TidyNode_Root: name = "Root"; break; case TidyNode_DocType: name = "DOCTYPE"; break; case TidyNode_Comment: name = "Comment"; break; case TidyNode_ProcIns: name = "Processing Instruction"; break; case TidyNode_Text: name = "Text"; break; case TidyNode_CDATA: name = "CDATA"; break; case TidyNode_Section: name = "XML Section"; break; case TidyNode_Asp: name = "ASP"; break; case TidyNode_Jste: name = "JSTE"; break; case TidyNode_Php: name = "PHP"; break; case TidyNode_XmlDecl: name = "XML Declaration"; break; case TidyNode_Start: case TidyNode_End: case TidyNode_StartEnd: default: name = tidyNodeGetName(node); break; } assert(name != NULL); printf("Node(%d): %s {\n", level, ((char *)name)); /* the ifs could be combined with && */ if (stringEqual(((char *)name), "Text")) { TidyBuffer tnv = { 0 }; /* text-node value */ tidyBufClear(&tnv); tidyNodeGetValue(tdoc, node, &tnv); printf("Text: %s\n", tnv.bp); if (tnv.size) tidyBufFree(&tnv); } /* Get the first attribute for the node */ tattr = tidyAttrFirst(node); while (tattr != NULL) { /* Print the node and its attribute */ printf("@%s = %s\n", tidyAttrName(tattr), tidyAttrValue(tattr)); /* Get the next attribute */ tattr = tidyAttrNext(tattr); } } /* printNode */
static void parse_html(TidyDoc tdoc, TidyNode tnod, const url_list_t *elem, int indent, FILE *outfile) { TidyNode child; TidyAttr attr; TidyAttrId attr_id = TidyAttr_UNKNOWN; TidyNodeType node_type; TidyTagId node_id; ctmbstr name; char *url, *relative_url = NULL; int found = 0; int get_html_link = (!option_values.depth || elem->level < option_values.depth); int get_int_html_link = (!option_values.depth || elem->level < option_values.depth+1); int get_ext_depends = ((!option_values.depth || elem->level < option_values.depth+1) && !option_values.no_html_dependencies); for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) { node_type = tidyNodeGetType(child); switch (node_type) { case TidyNode_Start: case TidyNode_StartEnd: node_id = tidyNodeGetId(child); if (get_html_link && (node_id == TidyTag_A || node_id == TidyTag_AREA || node_id == TidyTag_MAP)) { found = 1; attr_id = TidyAttr_HREF; } else if (get_int_html_link && (node_id == TidyTag_FRAME || node_id == TidyTag_IFRAME)) { found = 1; attr_id = TidyAttr_SRC; } else if (get_ext_depends) { if (node_id == TidyTag_LINK) { found = 1; attr_id = TidyAttr_HREF; } else if (node_id == TidyTag_IMG || node_id == TidyTag_SCRIPT) { found = 1; attr_id = TidyAttr_SRC; } else { found = 0; attr_id = TidyAttr_UNKNOWN; } } else { found = 0; attr_id = TidyAttr_UNKNOWN; } if (found && (attr = tidyAttrGetById(child, attr_id)) != NULL) { url = (char *) tidyAttrValue(attr); string_free(relative_url); if (url && *url) add_new_url_and_check(elem, url, outfile ? &relative_url : NULL); } if (outfile && (name = tidyNodeGetName(child)) != NULL) { fprintf(outfile, "%*.*s%s", indent, indent, "<", name); for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) { fprintf(outfile, " %s", tidyAttrName(attr)); if (relative_url && (tidyAttrGetId(attr) == attr_id)) fprintf(outfile, "=\"%s\"", relative_url); else if (tidyAttrValue(attr)) fprintf(outfile, "=\"%s\"", tidyAttrValue(attr) ? tidyAttrValue(attr) : ""); else fprintf(outfile, "=\"\""); } string_free(relative_url); if (node_type == TidyNode_StartEnd) fprintf(outfile, "/>\n"); else { fprintf(outfile, ">\n"); parse_html(tdoc, child, elem, indent + 1, outfile); fprintf(outfile, "%*.*s%s>\n", indent + 1, indent + 1, "</", name); } } else { string_free(relative_url); parse_html(tdoc, child, elem, indent + 1, outfile); } break; case TidyNode_End: if (outfile) { if ((name = tidyNodeGetName(child)) != NULL) fprintf(outfile, "%*.*s/%s>\n", indent, indent, "<", name); } break; case TidyNode_Text: if (outfile) { TidyBuffer buf; TidyTagId parent_node_id = tidyNodeGetId(tnod); tidyBufInit(&buf); if (parent_node_id == TidyTag_SCRIPT || parent_node_id == TidyTag_STYLE) tidyNodeGetValue(tdoc, child, &buf); else tidyNodeGetText(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "%s", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_Comment: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "<!--%s-->\n", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_CDATA: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "<![CDATA[%s]]>\n", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_DocType: if (outfile) { int pub = 0; fprintf(outfile, "<!DOCTYPE %s", tidyNodeGetName(child)); for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) { if (!pub) { fprintf(outfile, " %s", tidyAttrName(attr)); if (!string_casecmp(tidyAttrName(attr), "PUBLIC")) pub = 1; } if (tidyAttrValue(attr)) fprintf(outfile, " \"%s\"", tidyAttrValue(attr)); } fprintf(outfile, ">\n"); } break; default: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "%s", (char *)buf.bp); tidyBufFree(&buf); } break; } } }