Esempio n. 1
0
/* Traverse the document tree */
void dumpNode(TidyDoc doc, TidyNode tnod, int indent )
{
  TidyNode child;
  for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) )
  {
    ctmbstr name = tidyNodeGetName( child );
    if ( name )
    {
      /* if it has a name, then it's an HTML tag ... */
      TidyAttr attr;
      printf( "%*.*s%s ", indent, indent, "<", name);
      /* walk the attribute list */
      for ( attr=tidyAttrFirst(child); attr; attr=tidyAttrNext(attr) ) {
        printf(tidyAttrName(attr));
        tidyAttrValue(attr)?printf("=\"%s\" ",
                                   tidyAttrValue(attr)):printf(" ");
      }
      printf( ">\n");
    }
    else {
      /* if it doesn't have a name, then it's probably text, cdata, etc... */
      TidyBuffer buf;
      tidyBufInit(&buf);
      tidyNodeGetText(doc, child, &buf);
      printf("%*.*s\n", indent, indent, buf.bp?(char *)buf.bp:"");
      tidyBufFree(&buf);
    }
    dumpNode( doc, child, indent + 4 ); /* recursive */
  }
}
Esempio n. 2
0
nuiHTMLAttrib::nuiHTMLAttrib(const void* _tattrib, nglTextEncoding encoding)
{
  TidyAttr tattr = (TidyAttr)_tattrib;
  mType = (AttributeType)tidyAttrGetId(tattr);
  mName.Import(tidyAttrName(tattr), encoding);
  mValue.Import(tidyAttrValue(tattr), encoding);
}
Esempio n. 3
0
static GHashTable* html_get_attributes(TidyNode node) {
	TidyAttr curr_attr = tidyAttrFirst(node);
	GHashTable* attrs = g_hash_table_new(g_str_hash, g_str_equal);
	gchar* canonical_name = NULL, * value = NULL;

	while(curr_attr) {
		canonical_name = g_utf8_strdown(tidyAttrName(curr_attr), -1);
		value = g_strdup(tidyAttrValue(curr_attr));
		g_hash_table_insert(attrs, canonical_name, value);
		curr_attr = tidyAttrNext(curr_attr);
	}

	return attrs;
}
Esempio n. 4
0
DFNode *fromTidyNode(DFDocument *htmlDoc, TidyDoc tdoc, TidyNode tnode)
{
    switch (tidyNodeGetType(tnode)) {
        case TidyNode_Text: {
            char *value = copyTidyNodeValue(tnode,tdoc);
            DFNode *result = DFCreateTextNode(htmlDoc,value);
            free(value);
            return result;
        }
        case TidyNode_CDATA:
            break;
        case TidyNode_Comment:
            break;
        case TidyNode_Root:
            printf("Have root\n");
            break;
        default: {
            const char *name = tidyNodeGetName(tnode);
            if (name == NULL) {
                printf("NULL name for %p, type %d\n",tnode,tidyNodeGetType(tnode));
                return NULL;
            }
            const NamespaceDecl *namespaceDecl = DFNameMapNamespaceForID(htmlDoc->map,NAMESPACE_HTML);
            Tag tag = DFNameMapTagForName(htmlDoc->map,namespaceDecl->namespaceURI,name);
            DFNode *element = DFCreateElement(htmlDoc,tag);

            for (TidyAttr tattr = tidyAttrFirst(tnode); tattr != NULL; tattr = tidyAttrNext(tattr)) {
                const char *name = tidyAttrName(tattr);
                const char *value = tidyAttrValue(tattr);
                if (value == NULL) // Can happen in case of the empty string
                    value = "";;
                Tag attrTag = DFNameMapTagForName(htmlDoc->map,namespaceDecl->namespaceURI,name);
                DFSetAttribute(element,attrTag,value);
            }

            for (TidyNode tchild = tidyGetChild(tnode); tchild != NULL; tchild = tidyGetNext(tchild)) {
                DFNode *child = fromTidyNode(htmlDoc,tdoc,tchild);
                if (child != NULL)
                    DFAppendChild(element,child);
            }
            return element;
        }
    }
    return NULL;
}
Esempio n. 5
0
static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
{

	TidyBuffer buf;
	TidyAttr	tempattr;
	TidyNode	tempnode;
	zval attribute, children, temp;
	PHPTidyObj *newobj;

	switch(type) {

		case is_node:
			if (!obj->std.properties) {
				rebuild_object_properties(&obj->std);
			}
			tidyBufInit(&buf);
			tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
			ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
			tidyBufFree(&buf);

			ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
			ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
			ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
			ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
			ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));

			switch(tidyNodeGetType(obj->node)) {
				case TidyNode_Root:
				case TidyNode_DocType:
				case TidyNode_Text:
				case TidyNode_Comment:
					break;

				default:
					ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
			}

			tempattr = tidyAttrFirst(obj->node);

			if (tempattr) {
				char *name, *val;
				array_init(&attribute);

				do {
					name = (char *)tidyAttrName(tempattr);
					val = (char *)tidyAttrValue(tempattr);
					if (name && val) {
						add_assoc_string(&attribute, name, val);
					}
				} while((tempattr = tidyAttrNext(tempattr)));
			} else {
				ZVAL_NULL(&attribute);
			}
			zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);

			tempnode = tidyGetChild(obj->node);

			if (tempnode) {
				array_init(&children);
				do {
					tidy_instanciate(tidy_ce_node, &temp);
					newobj = Z_TIDY_P(&temp);
					newobj->node = tempnode;
					newobj->type = is_node;
					newobj->ptdoc = obj->ptdoc;
					newobj->ptdoc->ref_count++;

					tidy_add_default_properties(newobj, is_node);
					add_next_index_zval(&children, &temp);

				} while((tempnode = tidyGetNext(tempnode)));

			} else {
				ZVAL_NULL(&children);
			}

			zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children);

			break;

		case is_doc:
			if (!obj->std.properties) {
				rebuild_object_properties(&obj->std);
			}
			ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
			ADD_PROPERTY_NULL(obj->std.properties, value);
			break;
	}
}
Esempio n. 6
0
static void convertNode(TidyNode node, int level, bool opentag)
{
	ctmbstr name;
	TidyAttr tattr;
	struct htmlTag *t;
	int nattr;		/* number of attributes */
	int i;

	switch (tidyNodeGetType(node)) {
	case TidyNode_Text:
		name = "Text";
		break;
	case TidyNode_Start:
	case TidyNode_End:
	case TidyNode_StartEnd:
		name = tidyNodeGetName(node);
		break;
	default:
		return;
	}

	t = newTag((char *)name);
	if (!t)
		return;

	if (!opentag) {
		t->slash = true;
		return;
	}

/* if a js script, remember the line number for error messages */
	if (t->action == TAGACT_SCRIPT)
		t->js_ln = tidyNodeLine(node);

/* this is the open tag, set the attributes */
/* special case for text tag */
	if (t->action == TAGACT_TEXT) {
		TidyBuffer tnv = { 0 };	/* text-node value */
		tidyBufClear(&tnv);
		tidyNodeGetValue(tdoc, node, &tnv);
		if (tnv.size) {
			t->textval = cloneString(tnv.bp);
			tidyBufFree(&tnv);
		}
	}

	nattr = 0;
	tattr = tidyAttrFirst(node);
	while (tattr != NULL) {
		++nattr;
		tattr = tidyAttrNext(tattr);
	}

	t->attributes = allocMem(sizeof(char *) * (nattr + 1));
	t->atvals = allocMem(sizeof(char *) * (nattr + 1));
	i = 0;
	tattr = tidyAttrFirst(node);
	while (tattr != NULL) {
		t->attributes[i] = cloneString(tidyAttrName(tattr));
		t->atvals[i] = cloneString(tidyAttrValue(tattr));
		++i;
		tattr = tidyAttrNext(tattr);
	}
	t->attributes[i] = 0;
	t->atvals[i] = 0;

/* innerHTML, only for certain tags */
	if (t->info->bits & TAG_INNERHTML) {
		TidyBuffer tnv = { 0 };	/* text-node value */
		tidyBufClear(&tnv);
		t->innerHTML = emptyString;
		tidyNodeGetText(tdoc, node, &tnv);
		if (tnv.size) {
/* But it's not the original html, it has been sanitized.
 * Put a cap on size, else memory consumed could, theoretically,
 * grow as the size of the document squared. */
			if (tnv.size <= 4096)
				t->innerHTML = cloneString(tnv.bp);
			tagStrip(t->innerHTML);
			tidyBufFree(&tnv);
		}
	}

}				/* convertNode */
Esempio n. 7
0
/* this is strictly for debugging, level >= 5 */
static void printNode(TidyNode node, int level, bool opentag)
{
	ctmbstr name;
	TidyAttr tattr;

	if (!opentag) {
		puts("}");
		return;
	}

	switch (tidyNodeGetType(node)) {
	case TidyNode_Root:
		name = "Root";
		break;
	case TidyNode_DocType:
		name = "DOCTYPE";
		break;
	case TidyNode_Comment:
		name = "Comment";
		break;
	case TidyNode_ProcIns:
		name = "Processing Instruction";
		break;
	case TidyNode_Text:
		name = "Text";
		break;
	case TidyNode_CDATA:
		name = "CDATA";
		break;
	case TidyNode_Section:
		name = "XML Section";
		break;
	case TidyNode_Asp:
		name = "ASP";
		break;
	case TidyNode_Jste:
		name = "JSTE";
		break;
	case TidyNode_Php:
		name = "PHP";
		break;
	case TidyNode_XmlDecl:
		name = "XML Declaration";
		break;
	case TidyNode_Start:
	case TidyNode_End:
	case TidyNode_StartEnd:
	default:
		name = tidyNodeGetName(node);
		break;
	}
	assert(name != NULL);
	printf("Node(%d): %s {\n", level, ((char *)name));
/* the ifs could be combined with && */
	if (stringEqual(((char *)name), "Text")) {
		TidyBuffer tnv = { 0 };	/* text-node value */
		tidyBufClear(&tnv);
		tidyNodeGetValue(tdoc, node, &tnv);
		printf("Text: %s\n", tnv.bp);
		if (tnv.size)
			tidyBufFree(&tnv);
	}

/* Get the first attribute for the node */
	tattr = tidyAttrFirst(node);
	while (tattr != NULL) {
/* Print the node and its attribute */
		printf("@%s = %s\n", tidyAttrName(tattr), tidyAttrValue(tattr));
/* Get the next attribute */
		tattr = tidyAttrNext(tattr);
	}
}				/* printNode */
Esempio n. 8
0
File: parse.c Progetto: ASpade/mulk
static void parse_html(TidyDoc tdoc, TidyNode tnod, const url_list_t *elem, int indent, FILE *outfile)
{
	TidyNode child;
	TidyAttr attr;
	TidyAttrId attr_id = TidyAttr_UNKNOWN;
	TidyNodeType node_type;
	TidyTagId node_id;
	ctmbstr name;
	char *url, *relative_url = NULL;
	int found = 0;
	int get_html_link = (!option_values.depth || elem->level < option_values.depth);
	int get_int_html_link = (!option_values.depth || elem->level < option_values.depth+1);
	int get_ext_depends = ((!option_values.depth || elem->level < option_values.depth+1)
		&& !option_values.no_html_dependencies);

	for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) {
		node_type = tidyNodeGetType(child);

		switch (node_type) {
			case TidyNode_Start:
			case TidyNode_StartEnd:
				node_id = tidyNodeGetId(child);
				if (get_html_link && (node_id == TidyTag_A || node_id == TidyTag_AREA || node_id == TidyTag_MAP)) {
					found = 1;
					attr_id = TidyAttr_HREF;
				}
				else if (get_int_html_link && (node_id == TidyTag_FRAME || node_id == TidyTag_IFRAME)) {
					found = 1;
					attr_id = TidyAttr_SRC; 
				}
				else if (get_ext_depends) {
					if (node_id == TidyTag_LINK) {
						found = 1;
						attr_id = TidyAttr_HREF;
					}
					else if (node_id == TidyTag_IMG || node_id == TidyTag_SCRIPT) {
						found = 1;
						attr_id = TidyAttr_SRC; 
					}
					else {
						found = 0;
						attr_id = TidyAttr_UNKNOWN;
					}
				}
				else {
					found = 0;
					attr_id = TidyAttr_UNKNOWN;
				}

				if (found && (attr = tidyAttrGetById(child, attr_id)) != NULL) {
					url = (char *) tidyAttrValue(attr);

					string_free(relative_url);
					if (url && *url)
						add_new_url_and_check(elem, url, outfile ? &relative_url : NULL);
				}

				if (outfile && (name = tidyNodeGetName(child)) != NULL) {
					fprintf(outfile, "%*.*s%s", indent, indent, "<", name);
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						fprintf(outfile, " %s", tidyAttrName(attr));
						if (relative_url && (tidyAttrGetId(attr) == attr_id))
							fprintf(outfile, "=\"%s\"", relative_url);
						else if (tidyAttrValue(attr))
							fprintf(outfile, "=\"%s\"", tidyAttrValue(attr) ? tidyAttrValue(attr) : "");
						else
							fprintf(outfile, "=\"\"");
					}
					string_free(relative_url);

					if (node_type == TidyNode_StartEnd)
						fprintf(outfile, "/>\n");
					else {
						fprintf(outfile, ">\n");
						parse_html(tdoc, child, elem, indent + 1, outfile);
						fprintf(outfile, "%*.*s%s>\n", indent + 1, indent + 1, "</", name);
					}
				}
				else {
					string_free(relative_url);
					parse_html(tdoc, child, elem, indent + 1, outfile);
				}
				break;
			case TidyNode_End:
				if (outfile) {
					if ((name = tidyNodeGetName(child)) != NULL)
						fprintf(outfile, "%*.*s/%s>\n", indent, indent, "<", name);
				}
				break;
			case TidyNode_Text:
				if (outfile) {
					TidyBuffer buf;
					TidyTagId parent_node_id = tidyNodeGetId(tnod);

					tidyBufInit(&buf);
					if (parent_node_id == TidyTag_SCRIPT || parent_node_id == TidyTag_STYLE)
						tidyNodeGetValue(tdoc, child, &buf);
					else
						tidyNodeGetText(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_Comment:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<!--%s-->\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_CDATA:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<![CDATA[%s]]>\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_DocType:
				if (outfile) {
					int pub = 0;

					fprintf(outfile, "<!DOCTYPE %s", tidyNodeGetName(child));
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						if (!pub) {
							fprintf(outfile, " %s", tidyAttrName(attr));
							if (!string_casecmp(tidyAttrName(attr), "PUBLIC"))
								pub = 1;
						}
						if (tidyAttrValue(attr))
							fprintf(outfile, " \"%s\"", tidyAttrValue(attr));
					}
					fprintf(outfile, ">\n");
				}
				break;
			default:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
		}
	}
}