コード例 #1
0
ファイル: parse.c プロジェクト: ASpade/mulk
void parse_urls(const char *filename, const url_list_t *elem)
{
	TidyDoc tdoc;
	int err;
	FILE *outfile = NULL;

	tdoc = tidyCreate();
	tidyOptSetBool(tdoc, TidyForceOutput, yes);
	tidyOptSetBool(tdoc, TidyMark, no);
	tidyOptSetBool(tdoc, TidyHideEndTags, yes);
	tidyOptSetBool(tdoc, TidyDropEmptyParas, no);
	tidyOptSetBool(tdoc, TidyJoinStyles, no);
	tidyOptSetBool(tdoc, TidyPreserveEntities, yes);
	tidyOptSetInt(tdoc, TidyMergeDivs, no);
	tidyOptSetInt(tdoc, TidyMergeSpans, no);
	tidyOptSetInt(tdoc, TidyWrapLen, 4096);
	tidyOptSetValue(tdoc, TidyCharEncoding, "utf8");
	tidySetReportFilter(tdoc, filter_cb);

	err = tidyParseFile(tdoc, filename);

	if (err >= 0) 
		err = tidyCleanAndRepair(tdoc);

	if (err >= 0) {
		outfile = option_values.save_relative_links && !option_values.disable_save_tree
			? fopen(filename, "w") : NULL;

		parse_html(tdoc, tidyGetRoot(tdoc), elem, 1, outfile);

		if (outfile)
			fclose(outfile);
	}

	tidyRelease(tdoc);
}
コード例 #2
0
ファイル: html_text_buffer.c プロジェクト: ayttm/ayttm
/*
 * Append a line of text to the buffer
 */
void html_text_buffer_append(GtkTextView *text_view, char *txt, int ignore)
{
	gchar *text = convert_to_utf8(txt);
	GtkTextIter iter;
	GtkTextMark *insert_mark;

	GdkRectangle iter_loc;
	GdkRectangle visible_rect;

	GtkTextBuffer *buffer = gtk_text_view_get_buffer(text_view);

	if (strcasestr(text, "<br>")) {
		char *c = text;
		while ((c = strchr(text, '\n')) != 0)
			*c = ' ';
		while ((c = strchr(text, '\r')) != 0)
			*c = ' ';
	} else if (strchr(text, '\r')) {
		char *c = text;
		if (strchr(text, '\n')) {
			while ((c = strchr(c, '\r')) != 0)
				*c = ' ';
		} else {
			while ((c = strchr(c, '\r')) != 0)
				*c = '\n';
		}
	}

	gtk_text_buffer_get_end_iter(buffer, &iter);

	insert_mark = gtk_text_buffer_get_mark(buffer, "real_end_mark");

	if (insert_mark) {
		GtkTextIter del;
		gtk_text_buffer_get_iter_at_mark(buffer, &del, insert_mark);
		gtk_text_buffer_delete(buffer, &del, &iter);
		gtk_text_buffer_get_end_iter(buffer, &iter);
	}
	else
		insert_mark = gtk_text_buffer_create_mark(buffer,
			"real_end_mark", &iter, TRUE);

	/* Decide first if we want to scroll the text to the end or not */
	gtk_text_view_get_iter_location(text_view, &iter, &iter_loc);
	gtk_text_view_get_visible_rect(text_view, &visible_rect);

	gtk_text_buffer_insert(buffer, &iter, text, -1);
	parse_html(text_view, *insert_mark, ignore);

	if (iter_loc.y <= visible_rect.y + visible_rect.height) {
		GtkTextMark *end_mark;

		gtk_text_buffer_get_end_iter(buffer, &iter);
		end_mark = gtk_text_buffer_create_mark(buffer, NULL, &iter,
			TRUE);

		gtk_text_view_scroll_mark_onscreen(text_view, end_mark);
		gtk_text_buffer_delete_mark(buffer, end_mark);
	}

	if (!(ignore & HTML_IGNORE_END))
		gtk_text_buffer_delete_mark(buffer, insert_mark);

	g_free(text);
}
コード例 #3
0
ファイル: html_tbl.c プロジェクト: ebichu/dd-wrt
void format_table(unsigned char *attr, unsigned char *html, unsigned char *eof, unsigned char **end, void *f)
{
	struct part *p = f;
	int border, cellsp, vcellpd, cellpd, align;
	int frame, rules, width, wf;
	struct rgb bgcolor;
	struct table *t;
	char *al;
	int cye;
	int x;
	int i;
	/*int llm = last_link_to_move;*/
	struct s_e *bad_html;
	int bad_html_n;
	struct node *n, *nn;
	int cpd_pass, cpd_width, cpd_last;
	/*if (!p->data) {
		debug("nested tables not supported");
		return;
	}*/
	table_level++;
	memcpy(&bgcolor, &par_format.bgcolor, sizeof(struct rgb));
	get_bgcolor(attr, &bgcolor);
	if ((border = get_num(attr, "border")) == -1) border = has_attr(attr, "border") || has_attr(attr, "rules") || has_attr(attr, "frame");
	/*if (!border) border = 1;*/

	if ((cellsp = get_num(attr, "cellspacing")) == -1) cellsp = 1;
	if ((cellpd = get_num(attr, "cellpadding")) == -1) {
		vcellpd = 0;
		cellpd = !!border;
	} else {
		vcellpd = cellpd >= HTML_CHAR_HEIGHT / 2 + 1;
		cellpd = cellpd >= HTML_CHAR_WIDTH / 2 + 1;
	}
	if (!border) cellsp = 0;
	else if (!cellsp) cellsp = 1;
	if (border > 2) border = 2;
	if (cellsp > 2) cellsp = 2;
	align = par_format.align;
	if (align == AL_NO || align == AL_BLOCK) align = AL_LEFT;
	if ((al = get_attr_val(attr, "align"))) {
		if (!strcasecmp(al, "left")) align = AL_LEFT;
		if (!strcasecmp(al, "center")) align = AL_CENTER;
		if (!strcasecmp(al, "right")) align = AL_RIGHT;
		mem_free(al);
	}
	frame = F_BOX;
	if ((al = get_attr_val(attr, "frame"))) {
		if (!strcasecmp(al, "void")) frame = F_VOID;
		if (!strcasecmp(al, "above")) frame = F_ABOVE;
		if (!strcasecmp(al, "below")) frame = F_BELOW;
		if (!strcasecmp(al, "hsides")) frame = F_HSIDES;
		if (!strcasecmp(al, "vsides")) frame = F_VSIDES;
		if (!strcasecmp(al, "lhs")) frame = F_LHS;
		if (!strcasecmp(al, "rhs")) frame = F_RHS;
		if (!strcasecmp(al, "box")) frame = F_BOX;
		if (!strcasecmp(al, "border")) frame = F_BOX;
		mem_free(al);
	}
	rules = border ? R_ALL : R_NONE;
	if ((al = get_attr_val(attr, "rules"))) {
		if (!strcasecmp(al, "none")) rules = R_NONE;
		if (!strcasecmp(al, "groups")) rules = R_GROUPS;
		if (!strcasecmp(al, "rows")) rules = R_ROWS;
		if (!strcasecmp(al, "cols")) rules = R_COLS;
		if (!strcasecmp(al, "all")) rules = R_ALL;
		mem_free(al);
	}
	if (!border) frame = F_VOID;
	wf = 0;
	if ((width = get_width(attr, "width", p->data || p->xp)) == -1) {
		width = par_format.width - par_format.leftmargin - par_format.rightmargin;
		if (width < 0) width = 0;
		wf = 1;
	}
	if (!(t = parse_table(html, eof, end, &bgcolor, p->data || p->xp, &bad_html, &bad_html_n))) {
		mem_free(bad_html);
		goto ret0;
	}
	for (i = 0; i < bad_html_n; i++) {
		while (bad_html[i].s < bad_html[i].e && WHITECHAR(*bad_html[i].s)) bad_html[i].s++;
		while (bad_html[i].s < bad_html[i].e && WHITECHAR(bad_html[i].e[-1])) bad_html[i].e--;
		if (bad_html[i].s < bad_html[i].e) parse_html(bad_html[i].s, bad_html[i].e, put_chars_f, line_break_f, special_f, p, NULL);
	}
	mem_free(bad_html);
	html_stack_dup();
	html_top.dontkill = 1;
	par_format.align = AL_LEFT;
	t->p = p;
	t->border = border;
	t->cellpd = cellpd;
	t->vcellpd = vcellpd;
	t->cellsp = cellsp;
	t->frame = frame;
	t->rules = rules;
	t->width = width;
	t->wf = wf;
	cpd_pass = 0;
	cpd_last = t->cellpd;
	cpd_width = 0;	/* not needed, but let the warning go away */
	again:
	get_cell_widths(t);
	if (get_column_widths(t)) goto ret2;
	get_table_width(t);
	if (!p->data && !p->xp) {
		if (!wf && t->max_t > width) t->max_t = width;
		if (t->max_t < t->min_t) t->max_t = t->min_t;
		if (t->max_t + par_format.leftmargin + par_format.rightmargin > p->xmax) p->xmax = t->max_t + par_format.leftmargin + par_format.rightmargin;
		if (t->min_t + par_format.leftmargin + par_format.rightmargin > p->x) p->x = t->min_t + par_format.leftmargin + par_format.rightmargin;
		goto ret2;
	}
	if (!cpd_pass && t->min_t > width && t->cellpd) {
		t->cellpd = 0;
		cpd_pass = 1;
		cpd_width = t->min_t;
		goto again;
	}
	if (cpd_pass == 1 && t->min_t > cpd_width) {
		t->cellpd = cpd_last;
		cpd_pass = 2;
		goto again;
	}
	/*debug("%d %d %d", t->min_t, t->max_t, width);*/
	if (t->min_t >= width) distribute_widths(t, t->min_t);
	else if (t->max_t < width && wf) distribute_widths(t, t->max_t);
	else distribute_widths(t, width);
	if (!p->data && p->xp == 1) {
		int ww = t->rw + par_format.leftmargin + par_format.rightmargin;
		if (ww > par_format.width) ww = par_format.width;
		if (ww < t->rw) ww = t->rw;
		if (ww > p->x) p->x = ww;
		p->cy += t->rh;
		goto ret2;
	}
#ifdef HTML_TABLE_2ND_PASS
	check_table_widths(t);
#endif
	x = par_format.leftmargin;
	if (align == AL_CENTER) x = (par_format.width + par_format.leftmargin - par_format.rightmargin - t->rw) / 2;
	if (align == AL_RIGHT) x = par_format.width - par_format.rightmargin - t->rw;
	if (x + t->rw > par_format.width) x = par_format.width - t->rw;
	if (x < 0) x = 0;
	/*display_table(t, x, p->cy, &cye);*/
	get_table_heights(t);
	if (!p->data) {
		if (t->rw + par_format.leftmargin + par_format.rightmargin > p->x) p->x = t->rw + par_format.leftmargin + par_format.rightmargin;
		p->cy += t->rh;
		goto ret2;
	}
	n = p->data->nodes.next;
	n->yw = p->yp - n->y + p->cy;
	display_complicated_table(t, x, p->cy, &cye);
	display_table_frames(t, x, p->cy);
	nn = mem_alloc(sizeof(struct node));
	nn->x = n->x;
	nn->y = p->yp + cye;
	nn->xw = n->xw;
	add_to_list(p->data->nodes, nn);
	/*sdbg(p->data);*/
	/*for (y = p->cy; y < cye; y++) {
		last_link_to_move = llm;
		align_line(p, y);
	}*/
	/*if (p->cy + t->rh != cye) internal("size does not match; 1:%d, 2:%d", p->cy + t->rh, cye);*/
	p->cy = cye;
	p->cx = -1;

	ret2:
	p->link_num = t->link_num;
	if (p->cy > p->y) p->y = p->cy;
	/*ret1:*/
	free_table(t);
	kill_html_stack_item(&html_top);
	ret0:
	/*ret:*/
	table_level--;
	if (!table_level) free_table_cache();
}
コード例 #4
0
ファイル: html_r.c プロジェクト: ebichu/dd-wrt
void do_format(char *start, char *end, struct part *part, unsigned char *head)
{
	parse_html(start, end, (int (*)(void *, unsigned char *, int)) put_chars_conv, (void (*)(void *)) line_break, (void *(*)(void *, int, ...)) html_special, part, head);
	/*if ((part->y -= line_breax) < 0) part->y = 0;*/
}
コード例 #5
0
ファイル: parse.c プロジェクト: ASpade/mulk
static void parse_html(TidyDoc tdoc, TidyNode tnod, const url_list_t *elem, int indent, FILE *outfile)
{
	TidyNode child;
	TidyAttr attr;
	TidyAttrId attr_id = TidyAttr_UNKNOWN;
	TidyNodeType node_type;
	TidyTagId node_id;
	ctmbstr name;
	char *url, *relative_url = NULL;
	int found = 0;
	int get_html_link = (!option_values.depth || elem->level < option_values.depth);
	int get_int_html_link = (!option_values.depth || elem->level < option_values.depth+1);
	int get_ext_depends = ((!option_values.depth || elem->level < option_values.depth+1)
		&& !option_values.no_html_dependencies);

	for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) {
		node_type = tidyNodeGetType(child);

		switch (node_type) {
			case TidyNode_Start:
			case TidyNode_StartEnd:
				node_id = tidyNodeGetId(child);
				if (get_html_link && (node_id == TidyTag_A || node_id == TidyTag_AREA || node_id == TidyTag_MAP)) {
					found = 1;
					attr_id = TidyAttr_HREF;
				}
				else if (get_int_html_link && (node_id == TidyTag_FRAME || node_id == TidyTag_IFRAME)) {
					found = 1;
					attr_id = TidyAttr_SRC; 
				}
				else if (get_ext_depends) {
					if (node_id == TidyTag_LINK) {
						found = 1;
						attr_id = TidyAttr_HREF;
					}
					else if (node_id == TidyTag_IMG || node_id == TidyTag_SCRIPT) {
						found = 1;
						attr_id = TidyAttr_SRC; 
					}
					else {
						found = 0;
						attr_id = TidyAttr_UNKNOWN;
					}
				}
				else {
					found = 0;
					attr_id = TidyAttr_UNKNOWN;
				}

				if (found && (attr = tidyAttrGetById(child, attr_id)) != NULL) {
					url = (char *) tidyAttrValue(attr);

					string_free(relative_url);
					if (url && *url)
						add_new_url_and_check(elem, url, outfile ? &relative_url : NULL);
				}

				if (outfile && (name = tidyNodeGetName(child)) != NULL) {
					fprintf(outfile, "%*.*s%s", indent, indent, "<", name);
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						fprintf(outfile, " %s", tidyAttrName(attr));
						if (relative_url && (tidyAttrGetId(attr) == attr_id))
							fprintf(outfile, "=\"%s\"", relative_url);
						else if (tidyAttrValue(attr))
							fprintf(outfile, "=\"%s\"", tidyAttrValue(attr) ? tidyAttrValue(attr) : "");
						else
							fprintf(outfile, "=\"\"");
					}
					string_free(relative_url);

					if (node_type == TidyNode_StartEnd)
						fprintf(outfile, "/>\n");
					else {
						fprintf(outfile, ">\n");
						parse_html(tdoc, child, elem, indent + 1, outfile);
						fprintf(outfile, "%*.*s%s>\n", indent + 1, indent + 1, "</", name);
					}
				}
				else {
					string_free(relative_url);
					parse_html(tdoc, child, elem, indent + 1, outfile);
				}
				break;
			case TidyNode_End:
				if (outfile) {
					if ((name = tidyNodeGetName(child)) != NULL)
						fprintf(outfile, "%*.*s/%s>\n", indent, indent, "<", name);
				}
				break;
			case TidyNode_Text:
				if (outfile) {
					TidyBuffer buf;
					TidyTagId parent_node_id = tidyNodeGetId(tnod);

					tidyBufInit(&buf);
					if (parent_node_id == TidyTag_SCRIPT || parent_node_id == TidyTag_STYLE)
						tidyNodeGetValue(tdoc, child, &buf);
					else
						tidyNodeGetText(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_Comment:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<!--%s-->\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_CDATA:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<![CDATA[%s]]>\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_DocType:
				if (outfile) {
					int pub = 0;

					fprintf(outfile, "<!DOCTYPE %s", tidyNodeGetName(child));
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						if (!pub) {
							fprintf(outfile, " %s", tidyAttrName(attr));
							if (!string_casecmp(tidyAttrName(attr), "PUBLIC"))
								pub = 1;
						}
						if (tidyAttrValue(attr))
							fprintf(outfile, " \"%s\"", tidyAttrValue(attr));
					}
					fprintf(outfile, ">\n");
				}
				break;
			default:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
		}
	}
}