static void del_token_fifo(struct token_fifo *tf) { size_t i; for (i = 0; i < tf->nt; i ++) if (S_TOKEN(tf->t[i].type)) freemem(tf->t[i].name); if (tf->nt) freemem(tf->t); }
/* * Send a token to the output (a token_fifo in lexer mode, the output * buffer in stand alone mode). */ void print_token(struct lexer_state *ls, struct token *t, long uz_line) { char *x = t->name; if (uz_line && t->line < 0) t->line = uz_line; if (ls->flags & LEXER) { struct token at; at = *t; if (S_TOKEN(t->type)) { at.name = sdup(at.name); throw_away(ls->gf, at.name); } aol(ls->output_fifo->t, ls->output_fifo->nt, at, TOKEN_LIST_MEMG); return; } if (ls->flags & KEEP_OUTPUT) { for (; ls->oline < ls->line;) put_char(ls, '\n'); } if (!S_TOKEN(t->type)) x = operators_name[t->type]; for (; *x; x ++) put_char(ls, *x); }
/* * print the content of a macro, in #define form */ static void print_macro(void *vm) { struct macro *m = vm; char *mname = HASH_ITEM_NAME(m); int x = check_special_macro(mname); size_t i; if (x != MAC_NONE) { fprintf(emit_output, "/* #define %s */ /* special */\n", mname); return; } fprintf(emit_output, "#define %s", mname); if (m->narg >= 0) { fprintf(emit_output, "("); for (i = 0; i < (size_t)(m->narg); i ++) { fprintf(emit_output, i ? ", %s" : "%s", m->arg[i]); } if (m->vaarg) { fputs(m->narg ? ", ..." : "...", emit_output); } fprintf(emit_output, ")"); } if (m->cval.length == 0) { fputc('\n', emit_output); return; } fputc(' ', emit_output); for (i = 0; i < m->cval.length;) { int tt = m->cval.t[i ++]; if (tt == MACROARG) { unsigned anum = m->cval.t[i]; if (anum >= 128) anum = ((anum & 127U) << 8) | m->cval.t[++ i]; if (anum == (unsigned)m->narg) fputs("__VA_ARGS__", emit_output); else fputs(m->arg[anum], emit_output); i ++; } else if (S_TOKEN(tt)) { fputs((char *)(m->cval.t + i), emit_output); i += 1 + strlen((char *)(m->cval.t + i)); } else fputs(operators_name[tt], emit_output); } fputc('\n', emit_output); }
/* * compare two token_fifo, return 0 if they are identical, 1 otherwise. * All whitespace tokens are considered identical, but sequences of * whitespace are not shrinked. */ int cmp_token_list(struct token_fifo *f1, struct token_fifo *f2) { size_t i; if (f1->nt != f2->nt) return 1; for (i = 0; i < f1->nt; i ++) { if (ttMWS(f1->t[i].type) && ttMWS(f2->t[i].type)) continue; if (f1->t[i].type != f2->t[i].type) return 1; if (f1->t[i].type == MACROARG && f1->t[i].line != f2->t[i].line) return 1; if (S_TOKEN(f1->t[i].type) && strcmp(f1->t[i].name, f2->t[i].name)) return 1; } return 0; }
/* * Send a token to the output at a given line (this is for text output * and unreplaced macros due to lack of arguments). */ static void print_token_nailed(struct lexer_state *ls, struct token *t, long nail_line) { char *x = t->name; if (ls->flags & LEXER) { print_token(ls, t, 0); return; } if (ls->flags & KEEP_OUTPUT) { for (; ls->oline < nail_line;) put_char(ls, '\n'); } if (!S_TOKEN(t->type)) x = operators_name[t->type]; for (; *x; x ++) put_char(ls, *x); }
/* * for #unassert */ int handle_unassert(struct lexer_state *ls) { int ltww; struct ucpp_token t; struct token_fifo atl; struct assert *a; int ret = -1; long l = ls->line; int nnp; size_t i; atl.art = atl.nt = 0; while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) { if (!(a = HTT_get(&assertions, ls->ctok->name))) { ret = 0; goto handle_unassert_warp; } goto handle_unassert_next; } error(l, "illegal assertion name for #unassert"); goto handle_unassert_warp; } goto handle_unassert_trunc; handle_unassert_next: while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type != LPAR) { error(l, "syntax error in #unassert"); goto handle_unassert_warp; } goto handle_unassert_next2; } if (emit_assertions) fprintf(emit_output, "#unassert %s\n", HASH_ITEM_NAME(a)); HTT_del(&assertions, HASH_ITEM_NAME(a)); return 0; handle_unassert_next2: for (nnp = 1, ltww = 1; nnp && !next_token(ls);) { if (ls->ctok->type == NEWLINE) break; if (ltww && ttMWS(ls->ctok->type)) continue; ltww = ttMWS(ls->ctok->type); if (ls->ctok->type == LPAR) nnp ++; else if (ls->ctok->type == RPAR) { if (!(-- nnp)) goto handle_unassert_next3; } t.type = ls->ctok->type; if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); aol(atl.t, atl.nt, t, TOKEN_LIST_MEMG); } goto handle_unassert_trunc; handle_unassert_next3: while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(l, "trailing garbage in #unassert"); } } if (atl.nt && ttMWS(atl.t[atl.nt - 1].type) && (-- atl.nt) == 0) freemem(atl.t); if (atl.nt == 0) { error(l, "void assertion in #unassert"); return ret; } for (i = 0; i < a->nbval && cmp_token_list(&atl, a->val + i); i ++); if (i != a->nbval) { /* we have it, undefine it */ del_token_fifo(a->val + i); if (i < (a->nbval - 1)) mmvwo(a->val + i, a->val + i + 1, (a->nbval - i - 1) * sizeof(struct token_fifo)); if ((-- a->nbval) == 0) freemem(a->val); if (emit_assertions) { fprintf(emit_output, "#unassert %s(", HASH_ITEM_NAME(a)); print_token_fifo(&atl); fputs(")\n", emit_output); } } ret = 0; goto handle_unassert_finish; handle_unassert_trunc: error(l, "unfinished #unassert"); handle_unassert_finish: if (atl.nt) del_token_fifo(&atl); return ret; handle_unassert_warp: while (!next_token(ls) && ls->ctok->type != NEWLINE); return ret; }
/* * for #assert * Assertions are not part of the ISO-C89 standard, but they are sometimes * encountered, for instance in Solaris standard include files. */ int handle_assert(struct lexer_state *ls) { int ina = 0, ltww; struct ucpp_token t; struct token_fifo *atl = 0; struct assert *a; char *aname; int ret = -1; long l = ls->line; int nnp; size_t i; while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) { if (!(a = HTT_get(&assertions, ls->ctok->name))) { a = new_assertion(); aname = sdup(ls->ctok->name); ina = 1; } goto handle_assert_next; } error(l, "illegal assertion name for #assert"); goto handle_assert_warp_ign; } goto handle_assert_trunc; handle_assert_next: while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type != LPAR) { error(l, "syntax error in #assert"); goto handle_assert_warp_ign; } goto handle_assert_next2; } goto handle_assert_trunc; handle_assert_next2: atl = getmem(sizeof(struct token_fifo)); atl->art = atl->nt = 0; for (nnp = 1, ltww = 1; nnp && !next_token(ls);) { if (ls->ctok->type == NEWLINE) break; if (ltww && ttMWS(ls->ctok->type)) continue; ltww = ttMWS(ls->ctok->type); if (ls->ctok->type == LPAR) nnp ++; else if (ls->ctok->type == RPAR) { if (!(-- nnp)) goto handle_assert_next3; } t.type = ls->ctok->type; if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); aol(atl->t, atl->nt, t, TOKEN_LIST_MEMG); } goto handle_assert_trunc; handle_assert_next3: while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(l, "trailing garbage in #assert"); } } if (atl->nt && ttMWS(atl->t[atl->nt - 1].type) && (-- atl->nt) == 0) freemem(atl->t); if (atl->nt == 0) { error(l, "void assertion in #assert"); goto handle_assert_error; } for (i = 0; i < a->nbval && cmp_token_list(atl, a->val + i); i ++); if (i != a->nbval) { /* we already have it */ ret = 0; goto handle_assert_error; } /* This is a new assertion. Let's keep it. */ aol(a->val, a->nbval, *atl, TOKEN_LIST_MEMG); if (ina) { HTT_put(&assertions, a, aname); freemem(aname); } if (emit_assertions) { fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a)); print_token_fifo(atl); fputs(")\n", emit_output); } freemem(atl); return 0; handle_assert_trunc: error(l, "unfinished #assert"); handle_assert_error: if (atl) { del_token_fifo(atl); freemem(atl); } if (ina) { freemem(aname); freemem(a); } return ret; handle_assert_warp_ign: while (!next_token(ls) && ls->ctok->type != NEWLINE); if (ina) { freemem(aname); freemem(a); } return ret; }
/* * We found a #define directive; parse the end of the line, perform * sanity checks, store the new macro into the "macros" hash table. * * In case of a redefinition of a macro: we enforce the rule that a * macro should be redefined identically, including the spelling of * parameters. We emit an error on offending code; dura lex, sed lex. * After all, it is easy to avoid such problems, with a #undef directive. */ int handle_define(struct lexer_state *ls) { struct macro *m = 0, *n; struct token_fifo mv; int ltwws = 1, redef = 0; char *mname = 0; int narg; size_t nt; long l = ls->line; mv.art = mv.nt = 0; mv.t = NULL; /* find the next non-white token on the line, this should be the macro name */ while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) mname = sdup(ls->ctok->name); break; } if (mname == 0) { error(l, "missing macro name"); return 1; } if (check_special_macro(mname)) { error(l, "trying to redefine the special macro %s", mname); goto warp_error; } /* * If a macro with this name was already defined: the K&R * states that the new macro should be identical to the old one * (with some arcane rule of equivalence of whitespace); otherwise, * redefining the macro is an error. Most preprocessors would * only emit a warning (or nothing at all) on an unidentical * redefinition. * * Since it is easy to avoid this error (with a #undef directive), * we choose to enforce the rule and emit an error. */ if ((n = HTT_get(¯os, mname)) != 0) { /* redefinition of a macro: we must check that we define it identical */ redef = 1; n->cval.rp = 0; freemem(mname); mname = 0; } if (!redef) { m = new_macro(); m->narg = -1; #define mval mv } if (next_token(ls)) goto define_end; /* * Check if the token immediately following the macro name is * a left parenthesis; if so, then this is a macro with arguments. * Collect their names and try to match the next parenthesis. */ if (ls->ctok->type == LPAR) { int i, j; int need_comma = 0, saw_mdots = 0; narg = 0; while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) { error(l, "truncated macro definition"); goto define_error; } if (ls->ctok->type == COMMA) { if (saw_mdots) { error(l, "'...' must end the macro " "argument list"); goto warp_error; } if (!need_comma) { error(l, "void macro argument"); goto warp_error; } need_comma = 0; continue; } else if (ls->ctok->type == NAME) { if (saw_mdots) { error(l, "'...' must end the macro " "argument list"); goto warp_error; } if (need_comma) { error(l, "missing comma in " "macro argument list"); goto warp_error; } if (!redef) { aol(m->arg, narg, sdup(ls->ctok->name), 8); /* we must keep track of m->narg so that cleanup in case of error works. */ m->narg = narg; if (narg == 128 && (ls->flags & WARN_STANDARD)) warning(l, "more arguments to " "macro than the ISO " "limit (127)"); if (narg == 32767) { error(l, "too many arguments " "in macro definition " "(max 32766)"); goto warp_error; } } else { /* this is a redefinition of the macro; check equality between old and new definitions */ if (narg >= n->narg) goto redef_error; if (strcmp(ls->ctok->name, n->arg[narg ++])) goto redef_error; } need_comma = 1; continue; } else if ((ls->flags & MACRO_VAARG) && ls->ctok->type == MDOTS) { if (need_comma) { error(l, "missing comma before '...'"); goto warp_error; } if (redef && !n->vaarg) goto redef_error; if (!redef) m->vaarg = 1; saw_mdots = 1; need_comma = 1; continue; } else if (ls->ctok->type == RPAR) { if (narg > 0 && !need_comma) { error(l, "void macro argument"); goto warp_error; } if (redef && n->vaarg && !saw_mdots) goto redef_error; break; } else if (ttMWS(ls->ctok->type)) { continue; } error(l, "invalid macro argument"); goto warp_error; } if (!redef) { for (i = 1; i < narg; i ++) for (j = 0; j < i; j ++) if (!strcmp(m->arg[i], m->arg[j])) { error(l, "duplicate macro " "argument"); goto warp_error; } } if (!redef) m->narg = narg; } else { if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) warning(ls->line, "identifier not followed by " "whitespace in #define"); ls->flags |= READ_AGAIN; narg = 0; } if (redef) nt = 0; /* now, we have the arguments. Let's get the macro contents. */ while (!next_token(ls) && ls->ctok->type != NEWLINE) { struct token t; t.type = ls->ctok->type; if (ltwws && ttMWS(t.type)) continue; t.line = 0; if (t.type == NAME) { int i; if ((ls->flags & MACRO_VAARG) && !strcmp(ls->ctok->name, "__VA_ARGS__")) { if (redef) { if (!n->vaarg) goto redef_error; } else if (!m->vaarg) { error(l, "'__VA_ARGS__' is forbidden " "in macros with a fixed " "number of arguments"); goto warp_error; } t.type = MACROARG; t.line = redef ? n->narg : m->narg; } for (i = 0; i < narg; i ++) if (!strcmp(redef ? n->arg[i] : m->arg[i], ls->ctok->name)) { t.type = MACROARG; /* this is a hack: we store the argument number in the line field */ t.line = i; break; } } if (!redef && S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); if (ttMWS(t.type)) { if (ltwws) continue; #ifdef SEMPER_FIDELIS t.type = OPT_NONE; #else t.type = NONE; #endif ltwws = 1; } else ltwws = 0; if (!redef) { /* we ensure that each macro token has a correct line number */ if (t.type != MACROARG) t.line = 1; aol(mval.t, mval.nt, t, TOKEN_LIST_MEMG); } else { int tt; if (n->cval.rp >= n->cval.length) { #ifdef SEMPER_FIDELIS if (t.type != OPT_NONE) goto redef_error; #else if (t.type != NONE) goto redef_error; #endif } else if (t.type != n->cval.t[n->cval.rp]) { goto redef_error; } else if (t.type == MACROARG) { unsigned anum = n->cval.t[n->cval.rp + 1]; if (anum >= 128U) anum = ((anum & 127U) << 8) | m->cval.t[n->cval.rp + 2]; if (anum != (unsigned)t.line) goto redef_error; } else if (S_TOKEN(t.type) && strcmp(ls->ctok->name, (char *)(n->cval.t + n->cval.rp + 1))) { goto redef_error; } tt = n->cval.t[n->cval.rp ++]; if (S_TOKEN(tt)) n->cval.rp += 1 + strlen((char *)(n->cval.t + n->cval.rp)); else if (tt == MACROARG) { if (n->cval.t[++ n->cval.rp] >= 128) n->cval.rp ++; } nt ++; } } if (redef) { if (n->cval.rp < n->cval.length) goto redef_error_2; return 0; } /* now we have the complete macro; perform some checks about the operators # and ##, and, if everything is ok, store the macro into the hash table */ define_end: #ifdef SEMPER_FIDELIS if (mval.nt && mval.t[mval.nt - 1].type == OPT_NONE) { #else if (mval.nt && mval.t[mval.nt - 1].type == NONE) { #endif mval.nt --; if (mval.nt == 0) freemem(mval.t); } if (mval.nt != 0) { /* some checks about the macro */ if (mval.t[0].type == DSHARP || mval.t[0].type == DIG_DSHARP || mval.t[mval.nt - 1].type == DSHARP || mval.t[mval.nt - 1].type == DIG_DSHARP) { error(l, "operator '##' may neither begin " "nor end a macro"); goto define_error; } if (m->narg >= 0) { size_t i; for (i = 0; i < mval.nt; i ++) if ((mval.t[i].type == SHARP || mval.t[i].type == DIG_SHARP) && (i == (mval.nt - 1) || (ttMWS(mval.t[i + 1].type) && (i == mval.nt - 2 || mval.t[i + 2].type != MACROARG)) || (!ttMWS(mval.t[i + 1].type) && mval.t[i + 1].type != MACROARG))) { error(l, "operator '#' not followed " "by a macro argument"); goto define_error; } } } { size_t i, l; for (i = 0, l = 0; i < mval.nt; i ++) { l ++; if (S_TOKEN(mval.t[i].type)) l += 1 + strlen(mval.t[i].name); else if (mval.t[i].type == MACROARG) { l ++; if (mval.t[i].line >= 128) l ++; } } m->cval.length = l; if (l) m->cval.t = getmem(l); for (i = 0, l = 0; i < mval.nt; i ++) { m->cval.t[l ++] = mval.t[i].type; if (S_TOKEN(mval.t[i].type)) { size_t x = 1 + strlen(mval.t[i].name); mmv(m->cval.t + l, mval.t[i].name, x); l += x; freemem(mval.t[i].name); } else if (mval.t[i].type == MACROARG) { unsigned anum = mval.t[i].line; if (anum >= 128) { m->cval.t[l ++] = 128 | (anum >> 8); m->cval.t[l ++] = anum & 0xFF; } else { m->cval.t[l ++] = anum; } } }