char *copyTidyNodeValue(TidyNode tnode, TidyDoc tdoc)
{
    TidyBuffer buf;
    tidyBufInit(&buf);
    tidyNodeGetValue(tdoc,tnode,&buf);

    char *str = (char *)xmalloc(buf.size+1);
    memcpy(str,buf.bp,buf.size);
    str[buf.size] = '\0';

    tidyBufFree(&buf);
    return str;
}
Beispiel #2
0
void nuiHTMLNode::SetFromNode(const void* _tdoc, const void* _tnod, nglTextEncoding encoding, bool ComputeStyle)
{
  Clear();
  
  TidyDoc tdoc = (TidyDoc)_tdoc;
  TidyNode tnod = (TidyNode)_tnod;

  mName = nglString(tidyNodeGetName(tnod), encoding);
  mType = (NodeType)tidyNodeGetType(tnod);
  mTagType = (TagType)tidyNodeGetId(tnod);
  
  nglString text;
  TidyBuffer buf;
  tidyBufInit(&buf);
  if (tidyNodeGetValue(tdoc, tnod, &buf))
  {
    mText.Import((const char*)buf.bp, (int32)buf.size, encoding);
    //NGL_OUT(_T("<%s> %s\n"), mName.GetChars(), mText.GetChars());
  }
  tidyBufFree(&buf);
  
  // Fill the attributes:
  TidyAttr tattr;
  for (tattr = tidyAttrFirst(tnod); tattr; tattr = tidyAttrNext(tattr))
  {
    nuiHTMLAttrib* pAttrib = new nuiHTMLAttrib(tattr, encoding);
    mAttributes.push_back(pAttrib);
  }
  
  if (ComputeStyle)
  {
    mpStyle = new nuiCSSStyle(this);
    nuiHTMLAttrib* pStyle = GetAttribute(nuiHTMLAttrib::eAttrib_STYLE);
    if (pStyle)
    {
      AddStyleSheet(GetSourceURL(), pStyle->GetValue(), true);
    }
  }
}
Beispiel #3
0
static void convertNode(TidyNode node, int level, bool opentag)
{
	ctmbstr name;
	TidyAttr tattr;
	struct htmlTag *t;
	int nattr;		/* number of attributes */
	int i;

	switch (tidyNodeGetType(node)) {
	case TidyNode_Text:
		name = "Text";
		break;
	case TidyNode_Start:
	case TidyNode_End:
	case TidyNode_StartEnd:
		name = tidyNodeGetName(node);
		break;
	default:
		return;
	}

	t = newTag((char *)name);
	if (!t)
		return;

	if (!opentag) {
		t->slash = true;
		return;
	}

/* if a js script, remember the line number for error messages */
	if (t->action == TAGACT_SCRIPT)
		t->js_ln = tidyNodeLine(node);

/* this is the open tag, set the attributes */
/* special case for text tag */
	if (t->action == TAGACT_TEXT) {
		TidyBuffer tnv = { 0 };	/* text-node value */
		tidyBufClear(&tnv);
		tidyNodeGetValue(tdoc, node, &tnv);
		if (tnv.size) {
			t->textval = cloneString(tnv.bp);
			tidyBufFree(&tnv);
		}
	}

	nattr = 0;
	tattr = tidyAttrFirst(node);
	while (tattr != NULL) {
		++nattr;
		tattr = tidyAttrNext(tattr);
	}

	t->attributes = allocMem(sizeof(char *) * (nattr + 1));
	t->atvals = allocMem(sizeof(char *) * (nattr + 1));
	i = 0;
	tattr = tidyAttrFirst(node);
	while (tattr != NULL) {
		t->attributes[i] = cloneString(tidyAttrName(tattr));
		t->atvals[i] = cloneString(tidyAttrValue(tattr));
		++i;
		tattr = tidyAttrNext(tattr);
	}
	t->attributes[i] = 0;
	t->atvals[i] = 0;

/* innerHTML, only for certain tags */
	if (t->info->bits & TAG_INNERHTML) {
		TidyBuffer tnv = { 0 };	/* text-node value */
		tidyBufClear(&tnv);
		t->innerHTML = emptyString;
		tidyNodeGetText(tdoc, node, &tnv);
		if (tnv.size) {
/* But it's not the original html, it has been sanitized.
 * Put a cap on size, else memory consumed could, theoretically,
 * grow as the size of the document squared. */
			if (tnv.size <= 4096)
				t->innerHTML = cloneString(tnv.bp);
			tagStrip(t->innerHTML);
			tidyBufFree(&tnv);
		}
	}

}				/* convertNode */
Beispiel #4
0
/* this is strictly for debugging, level >= 5 */
static void printNode(TidyNode node, int level, bool opentag)
{
	ctmbstr name;
	TidyAttr tattr;

	if (!opentag) {
		puts("}");
		return;
	}

	switch (tidyNodeGetType(node)) {
	case TidyNode_Root:
		name = "Root";
		break;
	case TidyNode_DocType:
		name = "DOCTYPE";
		break;
	case TidyNode_Comment:
		name = "Comment";
		break;
	case TidyNode_ProcIns:
		name = "Processing Instruction";
		break;
	case TidyNode_Text:
		name = "Text";
		break;
	case TidyNode_CDATA:
		name = "CDATA";
		break;
	case TidyNode_Section:
		name = "XML Section";
		break;
	case TidyNode_Asp:
		name = "ASP";
		break;
	case TidyNode_Jste:
		name = "JSTE";
		break;
	case TidyNode_Php:
		name = "PHP";
		break;
	case TidyNode_XmlDecl:
		name = "XML Declaration";
		break;
	case TidyNode_Start:
	case TidyNode_End:
	case TidyNode_StartEnd:
	default:
		name = tidyNodeGetName(node);
		break;
	}
	assert(name != NULL);
	printf("Node(%d): %s {\n", level, ((char *)name));
/* the ifs could be combined with && */
	if (stringEqual(((char *)name), "Text")) {
		TidyBuffer tnv = { 0 };	/* text-node value */
		tidyBufClear(&tnv);
		tidyNodeGetValue(tdoc, node, &tnv);
		printf("Text: %s\n", tnv.bp);
		if (tnv.size)
			tidyBufFree(&tnv);
	}

/* Get the first attribute for the node */
	tattr = tidyAttrFirst(node);
	while (tattr != NULL) {
/* Print the node and its attribute */
		printf("@%s = %s\n", tidyAttrName(tattr), tidyAttrValue(tattr));
/* Get the next attribute */
		tattr = tidyAttrNext(tattr);
	}
}				/* printNode */
Beispiel #5
0
static void parse_html(TidyDoc tdoc, TidyNode tnod, const url_list_t *elem, int indent, FILE *outfile)
{
	TidyNode child;
	TidyAttr attr;
	TidyAttrId attr_id = TidyAttr_UNKNOWN;
	TidyNodeType node_type;
	TidyTagId node_id;
	ctmbstr name;
	char *url, *relative_url = NULL;
	int found = 0;
	int get_html_link = (!option_values.depth || elem->level < option_values.depth);
	int get_int_html_link = (!option_values.depth || elem->level < option_values.depth+1);
	int get_ext_depends = ((!option_values.depth || elem->level < option_values.depth+1)
		&& !option_values.no_html_dependencies);

	for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) {
		node_type = tidyNodeGetType(child);

		switch (node_type) {
			case TidyNode_Start:
			case TidyNode_StartEnd:
				node_id = tidyNodeGetId(child);
				if (get_html_link && (node_id == TidyTag_A || node_id == TidyTag_AREA || node_id == TidyTag_MAP)) {
					found = 1;
					attr_id = TidyAttr_HREF;
				}
				else if (get_int_html_link && (node_id == TidyTag_FRAME || node_id == TidyTag_IFRAME)) {
					found = 1;
					attr_id = TidyAttr_SRC; 
				}
				else if (get_ext_depends) {
					if (node_id == TidyTag_LINK) {
						found = 1;
						attr_id = TidyAttr_HREF;
					}
					else if (node_id == TidyTag_IMG || node_id == TidyTag_SCRIPT) {
						found = 1;
						attr_id = TidyAttr_SRC; 
					}
					else {
						found = 0;
						attr_id = TidyAttr_UNKNOWN;
					}
				}
				else {
					found = 0;
					attr_id = TidyAttr_UNKNOWN;
				}

				if (found && (attr = tidyAttrGetById(child, attr_id)) != NULL) {
					url = (char *) tidyAttrValue(attr);

					string_free(relative_url);
					if (url && *url)
						add_new_url_and_check(elem, url, outfile ? &relative_url : NULL);
				}

				if (outfile && (name = tidyNodeGetName(child)) != NULL) {
					fprintf(outfile, "%*.*s%s", indent, indent, "<", name);
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						fprintf(outfile, " %s", tidyAttrName(attr));
						if (relative_url && (tidyAttrGetId(attr) == attr_id))
							fprintf(outfile, "=\"%s\"", relative_url);
						else if (tidyAttrValue(attr))
							fprintf(outfile, "=\"%s\"", tidyAttrValue(attr) ? tidyAttrValue(attr) : "");
						else
							fprintf(outfile, "=\"\"");
					}
					string_free(relative_url);

					if (node_type == TidyNode_StartEnd)
						fprintf(outfile, "/>\n");
					else {
						fprintf(outfile, ">\n");
						parse_html(tdoc, child, elem, indent + 1, outfile);
						fprintf(outfile, "%*.*s%s>\n", indent + 1, indent + 1, "</", name);
					}
				}
				else {
					string_free(relative_url);
					parse_html(tdoc, child, elem, indent + 1, outfile);
				}
				break;
			case TidyNode_End:
				if (outfile) {
					if ((name = tidyNodeGetName(child)) != NULL)
						fprintf(outfile, "%*.*s/%s>\n", indent, indent, "<", name);
				}
				break;
			case TidyNode_Text:
				if (outfile) {
					TidyBuffer buf;
					TidyTagId parent_node_id = tidyNodeGetId(tnod);

					tidyBufInit(&buf);
					if (parent_node_id == TidyTag_SCRIPT || parent_node_id == TidyTag_STYLE)
						tidyNodeGetValue(tdoc, child, &buf);
					else
						tidyNodeGetText(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_Comment:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<!--%s-->\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_CDATA:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<![CDATA[%s]]>\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_DocType:
				if (outfile) {
					int pub = 0;

					fprintf(outfile, "<!DOCTYPE %s", tidyNodeGetName(child));
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						if (!pub) {
							fprintf(outfile, " %s", tidyAttrName(attr));
							if (!string_casecmp(tidyAttrName(attr), "PUBLIC"))
								pub = 1;
						}
						if (tidyAttrValue(attr))
							fprintf(outfile, " \"%s\"", tidyAttrValue(attr));
					}
					fprintf(outfile, ">\n");
				}
				break;
			default:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
		}
	}
}