Пример #1
static void del_token_fifo(struct token_fifo *tf)
	size_t i;

	for (i = 0; i < tf->nt; i ++)
		if (S_TOKEN(tf->t[i].type)) freemem(tf->t[i].name);
	if (tf->nt) freemem(tf->t);
Пример #2
 * Send a token to the output (a token_fifo in lexer mode, the output
 * buffer in stand alone mode).
void print_token(struct lexer_state *ls, struct token *t, long uz_line)
	char *x = t->name;

	if (uz_line && t->line < 0) t->line = uz_line;
	if (ls->flags & LEXER) {
		struct token at;

		at = *t;
		if (S_TOKEN(t->type)) {
			at.name = sdup(at.name);
			throw_away(ls->gf, at.name);
		aol(ls->output_fifo->t, ls->output_fifo->nt, at,
	if (ls->flags & KEEP_OUTPUT) {
		for (; ls->oline < ls->line;) put_char(ls, '\n');
	if (!S_TOKEN(t->type)) x = operators_name[t->type];
	for (; *x; x ++) put_char(ls, *x);
Пример #3
 * print the content of a macro, in #define form
static void print_macro(void *vm)
	struct macro *m = vm;
	char *mname = HASH_ITEM_NAME(m);
	int x = check_special_macro(mname);
	size_t i;

	if (x != MAC_NONE) {
		fprintf(emit_output, "/* #define %s */ /* special */\n",
	fprintf(emit_output, "#define %s", mname);
	if (m->narg >= 0) {
		fprintf(emit_output, "(");
		for (i = 0; i < (size_t)(m->narg); i ++) {
			fprintf(emit_output, i ? ", %s" : "%s", m->arg[i]);
		if (m->vaarg) {
			fputs(m->narg ? ", ..." : "...", emit_output);
		fprintf(emit_output, ")");
	if (m->cval.length == 0) {
		fputc('\n', emit_output);
	fputc(' ', emit_output);
	for (i = 0; i < m->cval.length;) {
		int tt = m->cval.t[i ++];

		if (tt == MACROARG) {
			unsigned anum = m->cval.t[i];

			if (anum >= 128) anum = ((anum & 127U) << 8)
				| m->cval.t[++ i];
			if (anum == (unsigned)m->narg)
				fputs("__VA_ARGS__", emit_output);
				fputs(m->arg[anum], emit_output);
			i ++;
		else if (S_TOKEN(tt)) {
			fputs((char *)(m->cval.t + i), emit_output);
			i += 1 + strlen((char *)(m->cval.t + i));
		} else fputs(operators_name[tt], emit_output);
	fputc('\n', emit_output);
Пример #4
 * compare two token_fifo, return 0 if they are identical, 1 otherwise.
 * All whitespace tokens are considered identical, but sequences of
 * whitespace are not shrinked.
int cmp_token_list(struct token_fifo *f1, struct token_fifo *f2)
	size_t i;

	if (f1->nt != f2->nt) return 1;
	for (i = 0; i < f1->nt; i ++) {
		if (ttMWS(f1->t[i].type) && ttMWS(f2->t[i].type)) continue;
		if (f1->t[i].type != f2->t[i].type) return 1;
		if (f1->t[i].type == MACROARG
			&& f1->t[i].line != f2->t[i].line) return 1;
		if (S_TOKEN(f1->t[i].type)
			&& strcmp(f1->t[i].name, f2->t[i].name)) return 1;
	return 0;
Пример #5
 * Send a token to the output at a given line (this is for text output
 * and unreplaced macros due to lack of arguments).
static void print_token_nailed(struct lexer_state *ls, struct token *t,
	long nail_line)
	char *x = t->name;

	if (ls->flags & LEXER) {
		print_token(ls, t, 0);
	if (ls->flags & KEEP_OUTPUT) {
		for (; ls->oline < nail_line;) put_char(ls, '\n');
	if (!S_TOKEN(t->type)) x = operators_name[t->type];
	for (; *x; x ++) put_char(ls, *x);
Пример #6
 * for #unassert
int handle_unassert(struct lexer_state *ls)
	int ltww;
	struct ucpp_token t;
	struct token_fifo atl;
	struct assert *a;
	int ret = -1;
	long l = ls->line;
	int nnp;
	size_t i;

	atl.art = atl.nt = 0;
	while (!next_token(ls)) {
		if (ls->ctok->type == NEWLINE) break;
		if (ttMWS(ls->ctok->type)) continue;
		if (ls->ctok->type == NAME) {
			if (!(a = HTT_get(&assertions, ls->ctok->name))) {
				ret = 0;
				goto handle_unassert_warp;
			goto handle_unassert_next;
		error(l, "illegal assertion name for #unassert");
		goto handle_unassert_warp;
	goto handle_unassert_trunc;

	while (!next_token(ls)) {
		if (ls->ctok->type == NEWLINE) break;
		if (ttMWS(ls->ctok->type)) continue;
		if (ls->ctok->type != LPAR) {
			error(l, "syntax error in #unassert");
			goto handle_unassert_warp;
		goto handle_unassert_next2;
	if (emit_assertions)
		fprintf(emit_output, "#unassert %s\n", HASH_ITEM_NAME(a));
	HTT_del(&assertions, HASH_ITEM_NAME(a));
	return 0;

	for (nnp = 1, ltww = 1; nnp && !next_token(ls);) {
		if (ls->ctok->type == NEWLINE) break;
		if (ltww && ttMWS(ls->ctok->type)) continue;
		ltww = ttMWS(ls->ctok->type);
		if (ls->ctok->type == LPAR) nnp ++;
		else if (ls->ctok->type == RPAR) {
			if (!(-- nnp)) goto handle_unassert_next3;
		t.type = ls->ctok->type;
		if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
		aol(atl.t, atl.nt, t, TOKEN_LIST_MEMG);
	goto handle_unassert_trunc;

	while (!next_token(ls) && ls->ctok->type != NEWLINE) {
		if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) {
			warning(l, "trailing garbage in #unassert");
	if (atl.nt && ttMWS(atl.t[atl.nt - 1].type) && (-- atl.nt) == 0)
	if (atl.nt == 0) {
		error(l, "void assertion in #unassert");
		return ret;
	for (i = 0; i < a->nbval && cmp_token_list(&atl, a->val + i); i ++);
	if (i != a->nbval) {
		/* we have it, undefine it */
		del_token_fifo(a->val + i);
		if (i < (a->nbval - 1))
			mmvwo(a->val + i, a->val + i + 1, (a->nbval - i - 1)
				* sizeof(struct token_fifo));
		if ((-- a->nbval) == 0) freemem(a->val);
		if (emit_assertions) {
			fprintf(emit_output, "#unassert %s(",
			fputs(")\n", emit_output);
	ret = 0;
	goto handle_unassert_finish;

	error(l, "unfinished #unassert");
	if (atl.nt) del_token_fifo(&atl);
	return ret;
	while (!next_token(ls) && ls->ctok->type != NEWLINE);
	return ret;
Пример #7
 * for #assert
 * Assertions are not part of the ISO-C89 standard, but they are sometimes
 * encountered, for instance in Solaris standard include files.
int handle_assert(struct lexer_state *ls)
	int ina = 0, ltww;
	struct ucpp_token t;
	struct token_fifo *atl = 0;
	struct assert *a;
	char *aname;
	int ret = -1;
	long l = ls->line;
	int nnp;
	size_t i;

	while (!next_token(ls)) {
		if (ls->ctok->type == NEWLINE) break;
		if (ttMWS(ls->ctok->type)) continue;
		if (ls->ctok->type == NAME) {
			if (!(a = HTT_get(&assertions, ls->ctok->name))) {
				a = new_assertion();
				aname = sdup(ls->ctok->name);
				ina = 1;
			goto handle_assert_next;
		error(l, "illegal assertion name for #assert");
		goto handle_assert_warp_ign;
	goto handle_assert_trunc;

	while (!next_token(ls)) {
		if (ls->ctok->type == NEWLINE) break;
		if (ttMWS(ls->ctok->type)) continue;
		if (ls->ctok->type != LPAR) {
			error(l, "syntax error in #assert");
			goto handle_assert_warp_ign;
		goto handle_assert_next2;
	goto handle_assert_trunc;

	atl = getmem(sizeof(struct token_fifo));
	atl->art = atl->nt = 0;
	for (nnp = 1, ltww = 1; nnp && !next_token(ls);) {
		if (ls->ctok->type == NEWLINE) break;
		if (ltww && ttMWS(ls->ctok->type)) continue;
		ltww = ttMWS(ls->ctok->type);
		if (ls->ctok->type == LPAR) nnp ++;
		else if (ls->ctok->type == RPAR) {
			if (!(-- nnp)) goto handle_assert_next3;
		t.type = ls->ctok->type;
		if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
		aol(atl->t, atl->nt, t, TOKEN_LIST_MEMG);
	goto handle_assert_trunc;

	while (!next_token(ls) && ls->ctok->type != NEWLINE) {
		if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) {
			warning(l, "trailing garbage in #assert");
	if (atl->nt && ttMWS(atl->t[atl->nt - 1].type) && (-- atl->nt) == 0)
	if (atl->nt == 0) {
		error(l, "void assertion in #assert");
		goto handle_assert_error;
	for (i = 0; i < a->nbval && cmp_token_list(atl, a->val + i); i ++);
	if (i != a->nbval) {
		/* we already have it */
		ret = 0;
		goto handle_assert_error;

	/* This is a new assertion. Let's keep it. */
	aol(a->val, a->nbval, *atl, TOKEN_LIST_MEMG);
	if (ina) {
		HTT_put(&assertions, a, aname);
	if (emit_assertions) {
		fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a));
		fputs(")\n", emit_output);
	return 0;

	error(l, "unfinished #assert");
	if (atl) {
	if (ina) {
	return ret;
	while (!next_token(ls) && ls->ctok->type != NEWLINE);
	if (ina) {
	return ret;
Пример #8
 * We found a #define directive; parse the end of the line, perform
 * sanity checks, store the new macro into the "macros" hash table.
 * In case of a redefinition of a macro: we enforce the rule that a
 * macro should be redefined identically, including the spelling of
 * parameters. We emit an error on offending code; dura lex, sed lex.
 * After all, it is easy to avoid such problems, with a #undef directive.
int handle_define(struct lexer_state *ls)
	struct macro *m = 0, *n;
	struct token_fifo mv;
	int ltwws = 1, redef = 0;
	char *mname = 0;
	int narg;
	size_t nt;
	long l = ls->line;

	mv.art = mv.nt = 0;
	mv.t = NULL;
	/* find the next non-white token on the line, this should be
	   the macro name */
	while (!next_token(ls) && ls->ctok->type != NEWLINE) {
		if (ttMWS(ls->ctok->type)) continue;
		if (ls->ctok->type == NAME) mname = sdup(ls->ctok->name);
	if (mname == 0) {
		error(l, "missing macro name");
		return 1;
	if (check_special_macro(mname)) {
		error(l, "trying to redefine the special macro %s", mname);
		goto warp_error;
	 * If a macro with this name was already defined: the K&R
	 * states that the new macro should be identical to the old one
	 * (with some arcane rule of equivalence of whitespace); otherwise,
	 * redefining the macro is an error. Most preprocessors would
	 * only emit a warning (or nothing at all) on an unidentical
	 * redefinition.
	 * Since it is easy to avoid this error (with a #undef directive),
	 * we choose to enforce the rule and emit an error.
	if ((n = HTT_get(&macros, mname)) != 0) {
		/* redefinition of a macro: we must check that we define
		   it identical */
		redef = 1;
		n->cval.rp = 0;
		mname = 0;
	if (!redef) {
		m = new_macro();
		m->narg = -1;
#define mval	mv
	if (next_token(ls)) goto define_end;
	 * Check if the token immediately following the macro name is
	 * a left parenthesis; if so, then this is a macro with arguments.
	 * Collect their names and try to match the next parenthesis.
	if (ls->ctok->type == LPAR) {
		int i, j;
		int need_comma = 0, saw_mdots = 0;

		narg = 0;
		while (!next_token(ls)) {
			if (ls->ctok->type == NEWLINE) {
				error(l, "truncated macro definition");
				goto define_error;
			if (ls->ctok->type == COMMA) {
				if (saw_mdots) {
					error(l, "'...' must end the macro "
						"argument list");
					goto warp_error;
				if (!need_comma) {
					error(l, "void macro argument");
					goto warp_error;
				need_comma = 0;
			} else if (ls->ctok->type == NAME) {
				if (saw_mdots) {
					error(l, "'...' must end the macro "
						"argument list");
					goto warp_error;
				if (need_comma) {
					error(l, "missing comma in "
						"macro argument list");
					goto warp_error;
				if (!redef) {
					aol(m->arg, narg,
						sdup(ls->ctok->name), 8);
					/* we must keep track of m->narg
					   so that cleanup in case of
					   error works. */
					m->narg = narg;
					if (narg == 128
						&& (ls->flags & WARN_STANDARD))
						warning(l, "more arguments to "
							"macro than the ISO "
							"limit (127)");
					if (narg == 32767) {
						error(l, "too many arguments "
							"in macro definition "
							"(max 32766)");
						goto warp_error;
				} else {
					/* this is a redefinition of the
					   macro; check equality between
					   old and new definitions */
					if (narg >= n->narg) goto redef_error;
					if (strcmp(ls->ctok->name,
						n->arg[narg ++]))
						goto redef_error;
				need_comma = 1;
			} else if ((ls->flags & MACRO_VAARG)
				&& ls->ctok->type == MDOTS) {
				if (need_comma) {
					error(l, "missing comma before '...'");
					goto warp_error;
				if (redef && !n->vaarg) goto redef_error;
				if (!redef) m->vaarg = 1;
				saw_mdots = 1;
				need_comma = 1;
			} else if (ls->ctok->type == RPAR) {
				if (narg > 0 && !need_comma) {
					error(l, "void macro argument");
					goto warp_error;
				if (redef && n->vaarg && !saw_mdots)
					goto redef_error;
			} else if (ttMWS(ls->ctok->type)) {
			error(l, "invalid macro argument");
			goto warp_error;
		if (!redef) {
			for (i = 1; i < narg; i ++) for (j = 0; j < i; j ++)
				if (!strcmp(m->arg[i], m->arg[j])) {
					error(l, "duplicate macro "
					goto warp_error;
		if (!redef) m->narg = narg;
	} else {
		if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD))
			warning(ls->line, "identifier not followed by "
				"whitespace in #define");
		ls->flags |= READ_AGAIN;
		narg = 0;
	if (redef) nt = 0;

	/* now, we have the arguments. Let's get the macro contents. */
	while (!next_token(ls) && ls->ctok->type != NEWLINE) {
		struct token t;

		t.type = ls->ctok->type;
		if (ltwws && ttMWS(t.type)) continue;
		t.line = 0;
		if (t.type == NAME) {
			int i;

			if ((ls->flags & MACRO_VAARG)
				&& !strcmp(ls->ctok->name, "__VA_ARGS__")) {
				if (redef) {
					if (!n->vaarg) goto redef_error;
				} else if (!m->vaarg) {
					error(l, "'__VA_ARGS__' is forbidden "
						"in macros with a fixed "
						"number of arguments");
					goto warp_error;
				t.type = MACROARG;
				t.line = redef ? n->narg : m->narg;
			for (i = 0; i < narg; i ++)
				if (!strcmp(redef ? n->arg[i] : m->arg[i],
					ls->ctok->name)) {
					t.type = MACROARG;
					/* this is a hack: we store the
					   argument number in the line field */
					t.line = i;
		if (!redef && S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
		if (ttMWS(t.type)) {
			if (ltwws) continue;
			t.type = OPT_NONE;
			t.type = NONE;
			ltwws = 1;
		} else ltwws = 0;
		if (!redef) {
			/* we ensure that each macro token has a correct
			   line number */
			if (t.type != MACROARG) t.line = 1;
			aol(mval.t, mval.nt, t, TOKEN_LIST_MEMG);
		} else {
			int tt;

			if (n->cval.rp >= n->cval.length) {
				if (t.type != OPT_NONE) goto redef_error;
				if (t.type != NONE) goto redef_error;
			} else if (t.type != n->cval.t[n->cval.rp]) {
				goto redef_error;
			} else if (t.type == MACROARG) {
				unsigned anum = n->cval.t[n->cval.rp + 1];

				if (anum >= 128U) anum = ((anum & 127U) << 8)
					| m->cval.t[n->cval.rp + 2];
				if (anum != (unsigned)t.line) goto redef_error;
			} else if (S_TOKEN(t.type) && strcmp(ls->ctok->name,
				   (char *)(n->cval.t + n->cval.rp + 1))) {
				goto redef_error;
			tt = n->cval.t[n->cval.rp ++];
			if (S_TOKEN(tt)) n->cval.rp += 1
				+ strlen((char *)(n->cval.t + n->cval.rp));
			else if (tt == MACROARG) {
				if (n->cval.t[++ n->cval.rp] >= 128)
					n->cval.rp ++;
			nt ++;

	if (redef) {
		if (n->cval.rp < n->cval.length) goto redef_error_2;
		return 0;

	/* now we have the complete macro; perform some checks about
	   the operators # and ##, and, if everything is ok,
	   store the macro into the hash table */
	if (mval.nt && mval.t[mval.nt - 1].type == OPT_NONE) {
	if (mval.nt && mval.t[mval.nt - 1].type == NONE) {
		mval.nt --;
		if (mval.nt == 0) freemem(mval.t);
	if (mval.nt != 0) {
		/* some checks about the macro */
		if (mval.t[0].type == DSHARP
			|| mval.t[0].type == DIG_DSHARP
			|| mval.t[mval.nt - 1].type == DSHARP
			|| mval.t[mval.nt - 1].type == DIG_DSHARP) {
			error(l, "operator '##' may neither begin "
				"nor end a macro");
			goto define_error;
		if (m->narg >= 0) {
			size_t i;
			for (i = 0; i < mval.nt; i ++)
				if ((mval.t[i].type == SHARP
					|| mval.t[i].type == DIG_SHARP) &&
					(i == (mval.nt - 1)
					|| (ttMWS(mval.t[i + 1].type) &&
					    (i == mval.nt - 2
					     || mval.t[i + 2].type != MACROARG))
					|| (!ttMWS(mval.t[i + 1].type)
					     && mval.t[i + 1].type != MACROARG))) {
					error(l, "operator '#' not followed "
						"by a macro argument");
					goto define_error;
		size_t i, l;

		for (i = 0, l = 0; i < mval.nt; i ++) {
			l ++;
			if (S_TOKEN(mval.t[i].type))
				l += 1 + strlen(mval.t[i].name);
			else if (mval.t[i].type == MACROARG) {
				l ++;
				if (mval.t[i].line >= 128) l ++;
		m->cval.length = l;
		if (l) m->cval.t = getmem(l);
		for (i = 0, l = 0; i < mval.nt; i ++) {
			m->cval.t[l ++] = mval.t[i].type;
			if (S_TOKEN(mval.t[i].type)) {
				size_t x = 1 + strlen(mval.t[i].name);

				mmv(m->cval.t + l, mval.t[i].name, x);
				l += x;
			else if (mval.t[i].type == MACROARG) {
				unsigned anum = mval.t[i].line;

				if (anum >= 128) {
					m->cval.t[l ++] = 128 | (anum >> 8);
					m->cval.t[l ++] = anum & 0xFF;
				} else {
					m->cval.t[l ++] = anum;