void smt2_parsert::operator()() { char ch; unsigned open_parentheses=0; while(in.get(ch)) { switch(ch) { case ' ': case '\n': case '\r': case '\t': // skip any whitespace break; case ';': // comment // skip until newline while(in.get(ch) && ch!='\n') ; // ignore break; case '(': // produce sub-expression open_parentheses++; open_expression(); break; case ')': // done with sub-expression if(open_parentheses==0) // unexpected ')'. This is an error; return; open_parentheses--; close_expression(); if(open_parentheses==0) return; // done break; case '|': // quoted symbol get_quoted_symbol(); symbol(); if(open_parentheses==0) return; // done break; case '"': // string literal get_string_literal(); symbol(); if(open_parentheses==0) return; // done break; default: // likely a simple symbol get_simple_symbol(ch); symbol(); if(open_parentheses==0) return; // done } } if(open_parentheses==0) { // Hmpf, eof before we got anything. Blank file! } else { // Eof before end of expression. Error! } }
/* Peel the next preprocessor token off of SRC, and put it in TOK. Mutate TOK to refer to the first token in SRC, and mutate SRC to refer to the text after that token. SRC must be a shared buffer; the resulting TOK will be shared, pointing into the same string SRC does. Initialize TOK's last_token field. Return non-zero if we succeed, or 0 if we didn't find any more tokens in SRC. */ static int get_token (struct macro_buffer *tok, struct macro_buffer *src) { char *p = src->text; char *end = p + src->len; gdb_assert (src->shared); /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4: preprocessing-token: header-name identifier pp-number character-constant string-literal punctuator each non-white-space character that cannot be one of the above We don't have to deal with header-name tokens, since those can only occur after a #include, which we will never see. */ while (p < end) if (macro_is_whitespace (*p)) p++; else if (get_comment (tok, p, end)) p += tok->len; else if (get_pp_number (tok, p, end) || get_character_constant (tok, p, end) || get_string_literal (tok, p, end) /* Note: the grammar in the standard seems to be ambiguous: L'x' can be either a wide character constant, or an identifier followed by a normal character constant. By trying `get_identifier' after we try get_character_constant and get_string_literal, we give the wide character syntax precedence. Now, since GDB doesn't handle wide character constants anyway, is this the right thing to do? */ || get_identifier (tok, p, end) || get_punctuator (tok, p, end)) { /* How many characters did we consume, including whitespace? */ int consumed = p - src->text + tok->len; src->text += consumed; src->len -= consumed; return 1; } else { /* We have found a "non-whitespace character that cannot be one of the above." Make a token out of it. */ int consumed; set_token (tok, p, p + 1); consumed = p - src->text + tok->len; src->text += consumed; src->len -= consumed; return 1; } return 0; }
static void process_ucpp_token(struct ucpp_token *tok) { int *dummyptr = n_xmalloc(sizeof *dummyptr); static struct input_file infile; #if 0 NONE, /* whitespace */ NEWLINE, /* newline */ COMMENT, /* comment */ BUNCH, /* non-C characters */ PRAGMA, /* a #pragma directive */ CONTEXT, /* new file or #line */ SHARP, /* # */ DSHARP, /* ## */ OPT_NONE, /* optional space to separate tokens in text output */ DIGRAPH_TOKENS, /* there begin digraph tokens */ /* for DIG_*, do not change order, unless checking undig() in cpp.c */ DIG_LBRK, /* <: */ DIG_RBRK, /* :> */ DIG_LBRA, /* <% */ DIG_RBRA, /* %> */ DIG_SHARP, /* %: */ DIG_DSHARP, /* %:%: */ DIGRAPH_TOKENS_END, /* digraph tokens end here */ LAST_MEANINGFUL_TOKEN, /* reserved words will go there */ MACROARG, /* special token for representing macro arguments */ #endif switch (tok->type) { case LPAR: /* ( */ case RPAR: /* ) */ store_token(&toklist, dummyptr, tok->type == LPAR? TOK_PAREN_OPEN : TOK_PAREN_CLOSE, lineno, NULL); break; case LBRA: /* { */ case RBRA: /* } */ store_token(&toklist, dummyptr, tok->type == LBRA? TOK_COMP_OPEN: TOK_COMP_CLOSE, lineno, NULL); break; case LBRK: /* [ */ case RBRK: /* ] */ store_token(&toklist, dummyptr, tok->type == LBRK? TOK_ARRAY_OPEN : TOK_ARRAY_CLOSE, lineno, NULL); break; case SEMIC: /* ; */ store_token(&toklist, dummyptr, TOK_SEMICOLON, lineno, NULL); break; case STRING: { struct ty_string *tmpstr; char *str_value = tok->name; int is_wide_char = 0; if (*str_value == 'L') { /* Wide character string */ ++str_value; is_wide_char = 1; } set_input_file_buffer(&infile, str_value+1); /* +1 to skip opening " */ tmpstr = get_string_literal(&infile, is_wide_char); /* not wide char?? */ store_token(&toklist, tmpstr, TOK_STRING_LITERAL, lineno, NULL); break; } case NAME: { /* identifier */ char *ident; set_input_file_buffer(&infile, tok->name+1); ident = get_identifier(*tok->name, &infile); if (ident != NULL) { store_token(&toklist, ident, TOK_IDENTIFIER, lineno, n_xstrdup(tok->name)); } break; } case CHAR: { int err = 0; int is_wide_char = 0; char *str_value = tok->name; int tmpi; if (*str_value == 'L') { /* Wide character string */ ++str_value; is_wide_char = 1; } set_input_file_buffer(&infile, str_value+1); tmpi = get_char_literal(&infile, &err); /* XXX cross-comp */ if (!err) { int char_type; /* * Character literals are really treated * like integer constants */ int *tmpip = zalloc_buf(Z_CEXPR_BUF); /*n_xmalloc(16);*/ /* XXX */ if (tmpip == NULL) { perror("malloc"); exit(EXIT_FAILURE); } *tmpip = tmpi; if (is_wide_char) { char_type = backend->get_wchar_t()->code; /* * The assignment above assumes int, * i.e. 32bit on all supported * platforms */ assert(backend->get_sizeof_type( backend->get_wchar_t(), NULL) == 4); } else { char_type = TY_INT; } store_token(&toklist, tmpip, char_type, lineno, n_xstrdup(tok->name)); } is_wide_char = 0; break; } case NUMBER: { struct num *n; set_input_file_buffer(&infile, tok->name+1); n = get_num_literal(*tok->name, &infile); if (n != NULL) { store_token(&toklist, n->value, n->type, lineno, n_xstrdup(tok->name)); } else { lexerror("Couldn't read numeric literal"); } break; } case MDOTS: { int *tmp = n_xmalloc(sizeof *tmp); *tmp = 0; store_token(&toklist, tmp, TOK_ELLIPSIS, lineno, NULL); break; } case SLASH: /* / */ case ASSLASH: /* /= */ case MINUS: /* - */ case MMINUS: /* -- */ case ASMINUS: /* -= */ case ARROW: /* -> */ case PLUS: /* + */ case PPLUS: /* ++ */ case ASPLUS: /* += */ case LT: /* < */ case LEQ: /* <= */ case LSH: /* << */ case ASLSH: /* <<= */ case GT: /* > */ case GEQ: /* >= */ case RSH: /* >> */ case ASRSH: /* >>= */ case ASGN: /* = */ case SAME: /* == */ #ifdef CAST_OP case CAST: /* => */ #endif case NOT: /* ~ */ case NEQ: /* != */ case AND: /* & */ case LAND: /* && */ case ASAND: /* &= */ case OR: /* | */ case LOR: /* || */ case ASOR: /* |= */ case PCT: /* % */ case ASPCT: /* %= */ case STAR: /* * */ case ASSTAR: /* *= */ case CIRC: /* ^ */ case ASCIRC: /* ^= */ case LNOT: /* ! */ case COMMA: /* , */ case QUEST: /* ? */ case COLON: /* : */ case DOT: /* . */ case UPLUS: /* unary + */ case UMINUS: /* unary - */ { int *opval = n_xmalloc(sizeof *opval); char *optext = operators_name[tok->type]; set_input_file_buffer(&infile, optext+1); *opval = get_operator(*optext, &infile, &optext); /* XXX cross-comp */ if (*opval == -1) { lexerror("Invalid operator `%s'", optext); free(opval); } else { store_token(&toklist, opval, TOK_OPERATOR, lineno, tok->name); } break; } default: printf("Unhandled token, type %d, value %s\n", tok->type, tok->name? tok->name: "?"); break; } return NULL; }