static void read_directive(CppContext *ctx) { Token *tok; if (read_if(ctx, "define")) read_define(ctx); else if (read_if(ctx, "undef")) read_undef(ctx); else if (read_if(ctx, "if")) handle_cond_incl(ctx, COND_IF); else if (read_if(ctx, "elif")) handle_cond_incl(ctx, COND_ELIF); else if (read_if(ctx, "else")) handle_cond_incl(ctx, COND_ELSE); else if (read_if(ctx, "ifdef")) handle_cond_incl(ctx, COND_IFDEF); else if (read_if(ctx, "ifndef")) handle_cond_incl(ctx, COND_IFNDEF); else if (read_if(ctx, "endif")) handle_cond_incl(ctx, COND_ENDIF); else if (read_if(ctx, "include")) handle_include(ctx); else if (read_if(ctx, "line")) handle_line_directive(ctx); else if (read_if(ctx, "pragma")) handle_pragma(ctx); else if ( (tok = read_if(ctx, "error")) ) { read_error_directive(ctx, tok); } else { tok = read_cpp_token(ctx); if (tok && tok->toktype == TOKTYPE_NEWLINE) // 6.10.7 NULL directive. Do nothing. return; error_token(tok, "unsupported preprocessor directive: '%s'", token_to_string(tok)); } }
/* * The state machine looks for (approximately) these Perl regular expressions: * * m|\/\*.*?\*\/| * m|\/\/.*| * m|'.*?'| * m|".*?"| * m|#\s*include\s*"(.*?)"| * m|#\s*include\s*<(.*?>"| * * About 98% of the CPU time is spent here, and most of that is in * the 'start' paragraph. Because the current characters are * in a register, the start loop usually eats 4 or 8 characters * per memory read. The MAX4 and MIN4 tests dispose of most * input characters with 1 or 2 comparisons. */ void state_machine(const char * map, const char * end) { const char * next = map; const char * map_dot; unsigned long __buf = 0; for (;;) { start: GETNEXT __start: if (current > MAX4('/','\'','"','#')) goto start; if (current < MIN4('/','\'','"','#')) goto start; CASE('/', slash); CASE('\'', squote); CASE('"', dquote); CASE('#', pound); goto start; /* // */ slash_slash: GETNEXT CASE('\n', start); NOTCASE('\\', slash_slash); GETNEXT goto slash_slash; /* / */ slash: GETNEXT CASE('/', slash_slash); NOTCASE('*', __start); slash_star_dot_star: GETNEXT __slash_star_dot_star: NOTCASE('*', slash_star_dot_star); GETNEXT NOTCASE('/', __slash_star_dot_star); goto start; /* '.*?' */ squote: GETNEXT CASE('\'', start); NOTCASE('\\', squote); GETNEXT goto squote; /* ".*?" */ dquote: GETNEXT CASE('"', start); NOTCASE('\\', dquote); GETNEXT goto dquote; /* #\s* */ pound: GETNEXT CASE(' ', pound); CASE('\t', pound); CASE('i', pound_i); goto __start; /* #\s*i */ pound_i: GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('c', __start); GETNEXT NOTCASE('l', __start); GETNEXT NOTCASE('u', __start); GETNEXT NOTCASE('d', __start); GETNEXT NOTCASE('e', __start); goto pound_include; /* #\s*include\s* */ pound_include: GETNEXT CASE(' ', pound_include); CASE('\t', pound_include); map_dot = next; CASE('"', pound_include_dquote); CASE('<', pound_include_langle); goto __start; /* #\s*include\s*"(.*)" */ pound_include_dquote: GETNEXT CASE('\n', start); NOTCASE('"', pound_include_dquote); handle_include(0, map_dot, next - map_dot - 1); goto start; /* #\s*include\s*<(.*)> */ pound_include_langle: GETNEXT CASE('\n', start); NOTCASE('>', pound_include_langle); handle_include(1, map_dot, next - map_dot - 1); goto start; } }
/* * The state machine looks for (approximately) these Perl regular expressions: * * m|\/\*.*?\*\/| * m|\/\/.*| * m|'.*?'| * m|".*?"| * m|#\s*include\s*"(.*?)"| * m|#\s*include\s*<(.*?>"| * m|#\s*(?define|undef)\s*CONFIG_(\w*)| * m|(?!\w)CONFIG_| * * About 98% of the CPU time is spent here, and most of that is in * the 'start' paragraph. Because the current characters are * in a register, the start loop usually eats 4 or 8 characters * per memory read. The MAX5 and MIN5 tests dispose of most * input characters with 1 or 2 comparisons. */ void state_machine(const char * map, const char * end) { const char * next = map; const char * map_dot; unsigned long __buf = 0; for (;;) { start: GETNEXT __start: if (current > MAX5('/','\'','"','#','C')) goto start; if (current < MIN5('/','\'','"','#','C')) goto start; CASE('/', slash); CASE('\'', squote); CASE('"', dquote); CASE('#', pound); CASE('C', cee); goto start; /* // */ slash_slash: GETNEXT CASE('\n', start); NOTCASE('\\', slash_slash); GETNEXT goto slash_slash; /* / */ slash: GETNEXT CASE('/', slash_slash); NOTCASE('*', __start); slash_star_dot_star: GETNEXT __slash_star_dot_star: NOTCASE('*', slash_star_dot_star); GETNEXT NOTCASE('/', __slash_star_dot_star); goto start; /* '.*?' */ squote: GETNEXT CASE('\'', start); NOTCASE('\\', squote); GETNEXT goto squote; /* ".*?" */ dquote: GETNEXT CASE('"', start); NOTCASE('\\', dquote); GETNEXT goto dquote; /* #\s* */ pound: GETNEXT CASE(' ', pound); CASE('\t', pound); CASE('i', pound_i); CASE('d', pound_d); CASE('u', pound_u); goto __start; /* #\s*i */ pound_i: GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('c', __start); GETNEXT NOTCASE('l', __start); GETNEXT NOTCASE('u', __start); GETNEXT NOTCASE('d', __start); GETNEXT NOTCASE('e', __start); goto pound_include; /* #\s*include\s* */ pound_include: GETNEXT CASE(' ', pound_include); CASE('\t', pound_include); map_dot = next; CASE('"', pound_include_dquote); CASE('<', pound_include_langle); goto __start; /* #\s*include\s*"(.*)" */ pound_include_dquote: GETNEXT CASE('\n', start); NOTCASE('"', pound_include_dquote); handle_include(0, map_dot, next - map_dot - 1); goto start; /* #\s*include\s*<(.*)> */ pound_include_langle: GETNEXT CASE('\n', start); NOTCASE('>', pound_include_langle); handle_include(1, map_dot, next - map_dot - 1); goto start; /* #\s*d */ pound_d: GETNEXT NOTCASE('e', __start); GETNEXT NOTCASE('f', __start); GETNEXT NOTCASE('i', __start); GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('e', __start); goto pound_define_undef; /* #\s*u */ pound_u: GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('d', __start); GETNEXT NOTCASE('e', __start); GETNEXT NOTCASE('f', __start); goto pound_define_undef; /* * #\s*(define|undef)\s*CONFIG_(\w*) * * this does not define the word, because it could be inside another * conditional (#if 0). But I do parse the word so that this instance * does not count as a use. -- mec */ pound_define_undef: GETNEXT CASE(' ', pound_define_undef); CASE('\t', pound_define_undef); NOTCASE('C', __start); GETNEXT NOTCASE('O', __start); GETNEXT NOTCASE('N', __start); GETNEXT NOTCASE('F', __start); GETNEXT NOTCASE('I', __start); GETNEXT NOTCASE('G', __start); GETNEXT NOTCASE('_', __start); map_dot = next; pound_define_undef_CONFIG_word: GETNEXT if (isalnum(current) || current == '_') goto pound_define_undef_CONFIG_word; goto __start; /* \<CONFIG_(\w*) */ cee: if (next >= map+2 && (isalnum(next[-2]) || next[-2] == '_')) goto start; GETNEXT NOTCASE('O', __start); GETNEXT NOTCASE('N', __start); GETNEXT NOTCASE('F', __start); GETNEXT NOTCASE('I', __start); GETNEXT NOTCASE('G', __start); GETNEXT NOTCASE('_', __start); map_dot = next; cee_CONFIG_word: GETNEXT if (isalnum(current) || current == '_') goto cee_CONFIG_word; use_config(map_dot, next - map_dot - 1); goto __start; } }
static int yylex1(void) { register char *yyp; register int c; register int c1, c2; for (;;) { if (lex_fatal) { return -1; } switch(c = mygetc()) { case EOF: if (inctop) { struct incstate *p; p = inctop; (void)fclose(yyin); /*(void)fprintf(stderr, "popping to %s\n", p->file);*/ free(current_file); nexpands = 0; current_file = p->file; current_line = p->line + 1; current_incfile = p->incfnum; pragma_strict_types = p->pragma_strict_types; yyin = p->yyin; slast = p->slast; lastchar = p->lastchar; inctop = p->next; if (p->nbuf) { nbuf = p->nbuf; outp = defbuf + DEFMAX - nbuf; memcpy(outp, p->outp, nbuf); free((char *)p->outp); } else { nbuf = 0; outp = defbuf + DEFMAX; } store_line_number_info(current_incfile, current_line); incdepth--; free((char *)p); break; } if (iftop) { struct ifstate *p = iftop; lexerror(p->state == EXPECT_ENDIF ? "Missing #endif" : "Missing #else"); while (iftop) { p = iftop; iftop = p->next; free((char *)p); } } return -1; case '\n': { nexpands=0; store_line_number_info(current_incfile, current_line); current_line++; total_lines++; } /* FALLTHROUGH */ case ' ': case '\t': case '\f': case '\v': break; case '+': TRY('+', F_INC); TRY('=', F_ADD_EQ); return c; case '-': TRY('>', F_ARROW); TRY('-', F_DEC); TRY('=', F_SUB_EQ); return c; case '&': TRY('&', F_LAND); TRY('=', F_AND_EQ); return c; case '|': TRY('|', F_LOR); TRY('=', F_OR_EQ); return c; case '^': TRY('=', F_XOR_EQ); return c; case '<': if (gobble('<')) { TRY('=', F_LSH_EQ); return F_LSH; } TRY('=', F_LE); return c; case '>': if (gobble('>')) { TRY('=', F_RSH_EQ); return F_RSH; } TRY('=', F_GE); return c; case '*': TRY('=', F_MULT_EQ); return c; case '%': TRY('=', F_MOD_EQ); return F_MOD; case '/': if (gobble('*')) { skip_comment(); break; } else if (gobble('/')) { skip_comment2(); break; } TRY('=', F_DIV_EQ); return c; case '=': TRY('=', F_EQ); return c; case ';': case '(': case ')': case ',': case '{': case '}': case '~': case '[': case ']': case '?': case '@': return c; case '!': TRY('=', F_NE); return F_NOT; case ':': TRY(':', F_COLON_COLON); return ':'; case '.': if (gobble('.')) { if (gobble('.')) return F_VARARG; else return F_RANGE; } return c; case '#': if (lastchar == '\n') { char *ssp = 0; int quote; yyp = yytext; do { c = mygetc(); } while (isspace(c)); for (quote = 0;;) { if (c == '"') quote ^= 1; /*gc - handle comments cpp-like! 1.6.91 @@@*/ while (!quote && c == '/') { if (gobble('*')) { skip_comment(); c = mygetc(); } else break; } if (!ssp && isspace(c)) ssp = yyp; if (c == '\n' || c == EOF) break; SAVEC; c = mygetc(); } if (ssp) { *ssp++ = 0; while (isspace(*ssp)) ssp++; } else { ssp = yyp; } *yyp = 0; if (strcmp("define", yytext) == 0) { handle_define(ssp); } else if (strcmp("if", yytext) == 0) { #if 0 short int nega=0; /*@@@ allow #if !VAR gc 1.6.91*/ if (*ssp=='!'){ ssp++; nega=1;} if (isdigit(*ssp)) { char *p; long l; l = strtol(ssp, &p, 10); while (isspace(*p)) p++; if (*p) lexerror("Condition too complex in #if"); else handle_cond(nega ? !(int)l : (int)l); } else if (isalunum(*ssp)) { char *p = ssp; while (isalunum(*p)) p++; if (*p) { *p++ = 0; while (isspace(*p)) p++; } if (*p) lexerror("Condition too complex in #if"); else { struct defn *d; d = lookup_define(ssp); if (d) { handle_cond(nega ? !atoi(d->exps) : atoi(d->exps));/* a hack! */ } else { handle_cond(nega?1:0); /* cpp-like gc*/ } } } else lexerror("Condition too complex in #if"); #else int cond; myungetc(0); add_input(ssp); cond = cond_get_exp(0); if (mygetc()) { lexerror("Condition too complex in #if"); while (mygetc()) ; } else handle_cond(cond); #endif } else if (strcmp("ifdef", yytext) == 0) { deltrail(ssp); handle_cond(lookup_define(ssp) != 0); } else if (strcmp("ifndef", yytext) == 0) { deltrail(ssp); handle_cond(lookup_define(ssp) == 0); } else if (strcmp("else", yytext) == 0) { if (iftop && iftop->state == EXPECT_ELSE) { struct ifstate *p = iftop; /*(void)fprintf(stderr, "found else\n");*/ iftop = p->next; free((char *)p); (void)skip_to("endif", (char *)0); store_line_number_info(current_incfile, current_line); current_line++; total_lines++; } else { lexerror("Unexpected #else"); } } else if (strcmp("endif", yytext) == 0) { if (iftop && (iftop->state == EXPECT_ENDIF || iftop->state == EXPECT_ELSE)) { struct ifstate *p = iftop; /*(void)fprintf(stderr, "found endif\n");*/ iftop = p->next; free((char *)p); } else { lexerror("Unexpected #endif"); } } else if (strcmp("undef", yytext) == 0) { struct defn *d; deltrail(ssp); if ((d = lookup_define(ssp)) != NULL ) d->undef++; } else if (strcmp("echo", yytext) == 0) { (void)fprintf(stderr, "%s\n", ssp); } else if (strcmp("include", yytext) == 0) { /*(void)fprintf(stderr, "including %s\n", ssp); */ handle_include(ssp, 0); } else if (strcmp("pragma", yytext) == 0) { deltrail(ssp); handle_pragma(ssp); } else if (strcmp("error", yytext) == 0) { handle_exception(ERROR, ssp); } else if (strcmp("warning", yytext) == 0) { handle_exception(WARNING, ssp); } else { lexerror("Unrecognised # directive"); } myungetc('\n'); break; } else goto badlex; case '\'': yylval.number = mygetc(); if (yylval.number == '\\') { int tmp = mygetc(); switch (tmp) { case 'n': yylval.number = '\n'; break; case 't': yylval.number = '\t'; break; case 'b': yylval.number = '\b'; break; case 'a': yylval.number = '\a'; break; case 'v': yylval.number = '\v'; break; case '\'': case '\\': case '"': yylval.number = tmp; break; default: lexwarning("Bad character escape sequence"); yylval.number = tmp; break; } } if (!gobble('\'')) lexerror("Illegal character constant"); return F_NUMBER; case '"': yyp = yytext; *yyp++ = c; for (;;) { c = mygetc(); if (c == EOF) { lexerror("End of file in string"); return string("\"\""); } else if (c == '\n') { lexerror("Newline in string"); return string("\"\""); } SAVEC; if (c == '"') break; if (c == '\\') { c = mygetc(); if ( c == '\n' ) { yyp--; store_line_number_info(current_incfile, current_line); current_line++; total_lines++; } else if ( c == EOF ) { /* some operating systems give EOF only once */ myungetc(c); } else *yyp++ = c; } } *yyp = 0; return string(yytext); case '0': c = mygetc(); if ( c == 'X' || c == 'x' || c == 'o') { char *endptr; long long value; int base = 16; if (c == 'o') base = 8; yyp = yytext; for (;;) { c = mygetc(); if (!isxdigit(c)) break; SAVEC; } myungetc(c); *yyp = '\0'; value = strtoll(yytext, &endptr, base); if (*endptr != '\0') { fprintf(stderr, "%s\n", yytext); lexwarning("Invalid digits in octal number number"); } return number(value); } myungetc(c); c = '0'; /* FALLTHROUGH */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': yyp = yytext; *yyp++ = c; for (;;) { c = mygetc(); if (!isdigit(c)) break; SAVEC; } if (c == '.') { if (isdigit(c1 = mygetc())) { SAVEC; c = c1; SAVEC; for (c = mygetc(); isdigit(c); c = mygetc()) SAVEC; if (c == 'e' || c == 'E') { c1 = mygetc(); if (c1 == '-' || c1 == '+') { c2 = mygetc(); if (isdigit(c2)) { SAVEC; c = c1; SAVEC; c = c2; SAVEC; for (c = mygetc(); isdigit(c); c = mygetc()) SAVEC; } else { myungetc(c2); myungetc(c1); } } else if (isdigit(c1)) { SAVEC; c = c1; SAVEC; for (c = mygetc(); isdigit(c); c = mygetc()) SAVEC; } else myungetc(c1); } myungetc(c); *yyp = 0; return real(strtod(yytext, NULL)); } myungetc(c1); } myungetc(c); *yyp = 0; if (*yytext == '0') { /* OCTALS */ char *endptr; long long value; value = strtoll(yytext, &endptr, 010); if (*endptr != '\0') lexwarning("Invalid digits in octal number"); if (value != 0) lexwarning("Obsolete octal format used. Use 0o111 syntax"); return number(value); } return number(atoll(yytext)); default: if (isalpha(c) || c == '_') { int r; yyp = yytext; *yyp++ = c; for (;;) { c = mygetc(); if (!isalunum(c)) break; SAVEC; } *yyp = 0; myungetc(c); if (!expand_define()) { r = lookup_resword(yytext); if (r >= 0) { return r; } else return ident(yytext); } break; } goto badlex; } } badlex: { lexerror("Illegal character (hex %02x) '%c'", c, c); return ' '; } }
int handle_include(char *name, int ignore_errors) { char *p; char buf[1024]; FILE *f; struct incstate *is; int delim; if (*name != '"' && *name != '<') { struct defn *d; if ((d = lookup_define(name)) && d->nargs == -1) { char *q; q = d->exps; while (isspace(*q)) q++; return handle_include(q, ignore_errors); } else { if (!ignore_errors) lexerror("Missing leading \" or < in #include"); return 0; } } delim = *name++ == '"' ? '"' : '>'; for (p = name; *p && *p != delim; p++) ; if (!*p) { if (!ignore_errors) lexerror("Missing trailing \" or > in #include"); return 0; } if (strlen(name) > sizeof(buf) - 100) { if (!ignore_errors) lexerror("Include name too long."); return 0; } *p = 0; if ((f = inc_open(buf, sizeof(buf), name)) == NULL) { if (!ignore_errors) { lexerror("Cannot #include %s\n", name); } return 0; } is = (struct incstate *)xalloc(sizeof(struct incstate)); is->yyin = yyin; is->line = current_line; is->file = current_file; is->incfnum = current_incfile; is->slast = slast; is->lastchar = lastchar; is->next = inctop; is->pragma_strict_types = pragma_strict_types; if (nbuf) { memcpy(is->outp = (char *)xalloc(nbuf + 1), outp, nbuf); is->nbuf = nbuf; nbuf = 0; outp = defbuf + DEFMAX; } else { is->nbuf = 0; is->outp = NULL; } pragma_strict_types = 0; inctop = is; current_line = 1; current_file = xalloc(strlen(buf)+1); current_incfile = ++num_incfiles; (void)strcpy(current_file, buf); slast = lastchar = '\n'; yyin = f; incdepth++; return 1; }
void preprocess_file(preprocessor_state* preproc) { lexer_state save_lexer, *lexer = &preproc->lexer; long fpos; int c; token_lex tlex[10]; FILE* input = preproc->input; FILE* output = preproc->output; // if not -P fprintf(output, "# 0 \"%s\"\n", lexer->cur_file); tlex[0] = read_token(input, lexer, output); for ( ; tlex[0].tok.type != END; tlex[0] = read_token(input, lexer, output)) { if (tlex[0].tok.type == ID) { int is_macro = look_up_macro_loc(preproc, tlex[0].tok.v.id); if (is_macro >= 0) { macro_params* p = GET_PARAM(&preproc->params, is_macro); //for a function style macro if the name isn't followed by a ( it's not an error //it's just not an invocation so just print it out if (p->num_params >= 0) { save_lexer = *lexer; if ((fpos = ftell(input)) == -1) { perror("ftell failure in preprocess_file"); exit(0); } tlex[1] = read_token(input, lexer, NULL); if (tlex[1].tok.type != LPAREN) { *lexer = save_lexer; if (fseek(input, fpos, SEEK_SET)) { perror("fseek failure in preprocess_file"); exit(0); } if (tlex[1].tok.type == ID || tlex[1].tok.type == STR_LITERAL) free(tlex[1].tok.v.id); print_token(&tlex[0].tok, output, 0); free(tlex[0].tok.v.id); continue; } //restore ( for symmetry in handle_macro *lexer = save_lexer; if (fseek(input, fpos, SEEK_SET)) { perror("fseek failure in preprocess_file"); exit(0); } } handle_macro(preproc, is_macro); } else { print_token(&tlex[0].tok, output, 0); } free(tlex[0].tok.v.id); } else if (tlex[0].tok.type == POUND) { //has to be the first token if (lexer->cur_tok != 1) { preprocessor_error(NULL, lexer, "Error: stray # in program\n"); } save_lexer = *lexer; if ((fpos = ftell(input)) == -1) { perror("ftell failure in preprocess_file"); exit(0); } tlex[1] = read_token(input, lexer, NULL); if (tlex[0].line != tlex[1].line) { //null directive (EOF is always on it's own line) if (tlex[1].tok.type == POUND) { tlex[0] = tlex[1]; continue; } else if (tlex[1].tok.type == ID || tlex[1].tok.type == STR_LITERAL) { free(tlex[1].tok.v.id); } *lexer = save_lexer; if (fseek(input, fpos, SEEK_SET)) { perror("fseek failure in preprocess_file"); exit(0); } continue; } if (tlex[1].tok.type != ID) { preprocessor_error(&tlex[1], lexer, "expected ID as macro name,"); } if (!strcmp(tlex[1].tok.v.id, "define")) { free(tlex[1].tok.v.id); handle_define(preproc); goto eat_newline; } if (!strcmp(tlex[1].tok.v.id, "undef")) { free(tlex[1].tok.v.id); save_lexer = *lexer; fpos = ftell(input); tlex[1] = read_token(input, lexer, NULL); if (tlex[1].line != tlex[0].line) { preprocessor_error(NULL, lexer, "macro name missing in undef directive\n"); } if (tlex[1].tok.type != ID) { preprocessor_error(&tlex[1], lexer, "ID expected in undef directive,"); } int exists = look_up_macro_loc(preproc, tlex[0].tok.v.id); if (exists >= 0) { cvec_erase_str(&preproc->macros, exists, exists); cvec_erase_str(&preproc->values, exists, exists); cvec_erase_void(&preproc->params, exists, exists); } free(tlex[1].tok.v.id); goto eat_newline; } if (!strcmp(tlex[1].tok.v.id, "include")) { free(tlex[1].tok.v.id); handle_include(preproc); goto eat_newline; } if (!strcmp(tlex[1].tok.v.id, "if")) { free(tlex[1].tok.v.id); } if (!strcmp(tlex[1].tok.v.id, "ifdef")) { free(tlex[1].tok.v.id); handle_ifdef(preproc); goto eat_newline; } if (!strcmp(tlex[1].tok.v.id, "ifndef")) { free(tlex[1].tok.v.id); } if (!strcmp(tlex[1].tok.v.id, "else")) { } if (!strcmp(tlex[1].tok.v.id, "endif")) { } if (!strcmp(tlex[1].tok.v.id, "elif")) { } if (!strcmp(tlex[1].tok.v.id, "defined")) { } if (!strcmp(tlex[1].tok.v.id, "pragma")) { } if (!strcmp(tlex[1].tok.v.id, "error")) { // check for string // preprocessor_error(NULL, } eat_newline: //the newline should reset the position //but since the line of a directive should be //invisible to the final program it shouldn't affect line count do { c = getc(preproc->input); if (!isspace(c)) preprocessor_error(NULL, lexer, "extra tokens in directive\n"); } while (c != '\n'); lexer->cur_pos = 1; lexer->cur_tok = 0; } } }