/* Read and record the parameters, if any, of a function-like macro definition. Destroys pfile->out.cur. Returns true on success, false on failure (syntax error or a duplicate parameter). On success, CUR (pfile->context) is just past the closing parenthesis. */ static bool scan_parameters (cpp_reader *pfile, cpp_macro *macro) { const uchar *cur = CUR (pfile->context) + 1; bool ok; for (;;) { cur = skip_whitespace (pfile, cur, true /* skip_comments */); if (is_idstart (*cur)) { ok = false; if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur))) break; cur = skip_whitespace (pfile, CUR (pfile->context), true /* skip_comments */); if (*cur == ',') { cur++; continue; } ok = (*cur == ')'); break; } ok = (*cur == ')' && macro->paramc == 0); break; } if (!ok) cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list"); CUR (pfile->context) = cur + (*cur == ')'); return ok; }
/* Copies the next logical line in the current buffer (starting at buffer->cur) to the output buffer. The output is guaranteed to terminate with a NUL character. buffer->cur is updated. If MACRO is non-NULL, then we are scanning the replacement list of MACRO, and we call save_replacement_text() every time we meet an argument. */ bool _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro) { bool result = true; cpp_context *context; const uchar *cur; uchar *out; struct fun_macro fmacro; unsigned int c, paren_depth = 0, quote; enum ls lex_state = ls_none; bool header_ok; const uchar *start_of_input_line; fmacro.buff = NULL; quote = 0; header_ok = pfile->state.angled_headers; CUR (pfile->context) = pfile->buffer->cur; RLIMIT (pfile->context) = pfile->buffer->rlimit; pfile->out.cur = pfile->out.base; pfile->out.first_line = pfile->line; /* start_of_input_line is needed to make sure that directives really, really start at the first character of the line. */ start_of_input_line = pfile->buffer->cur; new_context: context = pfile->context; cur = CUR (context); check_output_buffer (pfile, RLIMIT (context) - cur); out = pfile->out.cur; for (;;) { if (!context->prev && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos) { pfile->buffer->cur = cur; _cpp_process_line_notes (pfile, false); } c = *cur++; *out++ = c; /* Whitespace should "continue" out of the switch, non-whitespace should "break" out of it. */ switch (c) { case ' ': case '\t': case '\f': case '\v': case '\0': continue; case '\n': /* If this is a macro's expansion, pop it. */ if (context->prev) { pfile->out.cur = out - 1; _cpp_pop_context (pfile); goto new_context; } /* Omit the newline from the output buffer. */ pfile->out.cur = out - 1; pfile->buffer->cur = cur; pfile->buffer->need_line = true; pfile->line++; if ((lex_state == ls_fun_open || lex_state == ls_fun_close) && !pfile->state.in_directive && _cpp_get_fresh_line (pfile)) { /* Newlines in arguments become a space, but we don't clear any in-progress quote. */ if (lex_state == ls_fun_close) out[-1] = ' '; cur = pfile->buffer->cur; continue; } goto done; case '<': if (header_ok) quote = '>'; break; case '>': if (c == quote) quote = 0; break; case '"': case '\'': if (c == quote) quote = 0; else if (!quote) quote = c; break; case '\\': /* Skip escaped quotes here, it's easier than above. */ if (*cur == '\\' || *cur == '"' || *cur == '\'') *out++ = *cur++; break; case '/': /* Traditional CPP does not recognize comments within literals. */ if (!quote && *cur == '*') { pfile->out.cur = out; cur = copy_comment (pfile, cur, macro != 0); out = pfile->out.cur; continue; } break; case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': if (!pfile->state.skipping && (quote == 0 || macro)) { cpp_hashnode *node; uchar *out_start = out - 1; pfile->out.cur = out_start; node = lex_identifier (pfile, cur - 1); out = pfile->out.cur; cur = CUR (context); if (node->type == NT_MACRO /* Should we expand for ls_answer? */ && (lex_state == ls_none || lex_state == ls_fun_open) && !pfile->state.prevent_expansion) { /* Macros invalidate MI optimization. */ pfile->mi_valid = false; if (! (node->flags & NODE_BUILTIN) && node->value.macro->fun_like) { maybe_start_funlike (pfile, node, out_start, &fmacro); lex_state = ls_fun_open; fmacro.line = pfile->line; continue; } else if (!recursive_macro (pfile, node)) { /* Remove the object-like macro's name from the output, and push its replacement text. */ pfile->out.cur = out_start; push_replacement_text (pfile, node); lex_state = ls_none; goto new_context; } } else if (macro && (node->flags & NODE_MACRO_ARG) != 0) { /* Found a parameter in the replacement text of a #define. Remove its name from the output. */ pfile->out.cur = out_start; save_replacement_text (pfile, macro, node->value.arg_index); out = pfile->out.base; } else if (lex_state == ls_hash) { lex_state = ls_predicate; continue; } else if (pfile->state.in_expression && node == pfile->spec_nodes.n_defined) { lex_state = ls_defined; continue; } } break; case '(': if (quote == 0) { paren_depth++; if (lex_state == ls_fun_open) { if (recursive_macro (pfile, fmacro.node)) lex_state = ls_none; else { lex_state = ls_fun_close; paren_depth = 1; out = pfile->out.base + fmacro.offset; fmacro.args[0] = fmacro.offset; } } else if (lex_state == ls_predicate) lex_state = ls_answer; else if (lex_state == ls_defined) lex_state = ls_defined_close; } break; case ',': if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1) save_argument (&fmacro, out - pfile->out.base); break; case ')': if (quote == 0) { paren_depth--; if (lex_state == ls_fun_close && paren_depth == 0) { cpp_macro *m = fmacro.node->value.macro; m->used = 1; lex_state = ls_none; save_argument (&fmacro, out - pfile->out.base); /* A single zero-length argument is no argument. */ if (fmacro.argc == 1 && m->paramc == 0 && out == pfile->out.base + fmacro.offset + 1) fmacro.argc = 0; if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc)) { /* Remove the macro's invocation from the output, and push its replacement text. */ pfile->out.cur = (pfile->out.base + fmacro.offset); CUR (context) = cur; replace_args_and_push (pfile, &fmacro); goto new_context; } } else if (lex_state == ls_answer || lex_state == ls_defined_close) lex_state = ls_none; } break; case '#': if (cur - 1 == start_of_input_line /* A '#' from a macro doesn't start a directive. */ && !pfile->context->prev && !pfile->state.in_directive) { /* A directive. With the way _cpp_handle_directive currently works, we only want to call it if either we know the directive is OK, or we want it to fail and be removed from the output. If we want it to be passed through (the assembler case) then we must not call _cpp_handle_directive. */ pfile->out.cur = out; cur = skip_whitespace (pfile, cur, true /* skip_comments */); out = pfile->out.cur; if (*cur == '\n') { /* Null directive. Ignore it and don't invalidate the MI optimization. */ pfile->buffer->need_line = true; pfile->line++; result = false; goto done; } else { bool do_it = false; if (is_numstart (*cur) && CPP_OPTION (pfile, lang) != CLK_ASM) do_it = true; else if (is_idstart (*cur)) /* Check whether we know this directive, but don't advance. */ do_it = lex_identifier (pfile, cur)->is_directive; if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM) { /* This is a kludge. We want to have the ISO preprocessor lex the next token. */ pfile->buffer->cur = cur; _cpp_handle_directive (pfile, false /* indented */); result = false; goto done; } } } if (pfile->state.in_expression) { lex_state = ls_hash; continue; } break; default: break; } /* Non-whitespace disables MI optimization and stops treating '<' as a quote in #include. */ header_ok = false; if (!pfile->state.in_directive) pfile->mi_valid = false; if (lex_state == ls_none) continue; /* Some of these transitions of state are syntax errors. The ISO preprocessor will issue errors later. */ if (lex_state == ls_fun_open) /* Missing '('. */ lex_state = ls_none; else if (lex_state == ls_hash || lex_state == ls_predicate || lex_state == ls_defined) lex_state = ls_none; /* ls_answer and ls_defined_close keep going until ')'. */ } done: if (fmacro.buff) _cpp_release_buff (pfile, fmacro.buff); if (lex_state == ls_fun_close) cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0, "unterminated argument list invoking macro \"%s\"", NODE_NAME (fmacro.node)); return result; }
/* * Gets the next token from the `lexfile` FILE stream. * Side effects: * - If the TokenType has an associated string, it is found in global `lexstr`. * - If the TokenType has an associated integer value, look in global `lexint`. */ TokenType next_tok(void) { if (!curr_char) eat(); // Eat first char. while (curr_char != EOF) { lo_col = curr_col; // Save first col of the token. // Newline. if (curr_char == '\n') { eat(); return TOK_NL; } // Skip whitespace. if (isspace(curr_char)) { eat(); continue; } // Skip comments until next line. if (curr_char == ';') { do { eat(); } while (curr_char != '\n' && curr_char != EOF); continue; } // id ::= [A-Za-z$_][A-Za-z_$0-9]* // reg ::= r([0-9]|1[0-5])[abcd] | rs | re[0-6] | rk[0-7] // label ::= <nonopcode id>: if (is_idstart(curr_char)) { int i; // TODO: Make this loop prettier. for (i = 0; curr_char != EOF; i++) { lexstr[i] = curr_char; if (is_idcont(peek())) { eat(); } else { break; } } lexstr[++i] = '\0'; eat(); // Advance to next char after the identifier. // Register? if (lexstr[0] == 'r') { // Long or short if (is_long_reg(lexstr)) return TOK_GL_REG; if (is_short_reg(lexstr)) return TOK_GS_REG; // Extra if (is_extra_reg(lexstr)) return TOK_E_REG; // Kernel if (is_kernel_reg(lexstr)) return TOK_K_REG; } // Directive? if (is_dtv(lexstr)) { return TOK_DATA_SEG; } // Instruction? if (isInstruction(lexstr)) return TOK_INSTR; // Label? if (curr_char == ':') { eat(); // Eat the ':' return TOK_LABEL; } // Plain identfier return TOK_ID; } // chr_lit ::= '[^\\']' if (curr_char == '\'') { eat(); // Get inner char. if (curr_char == '\\') { lexstr[0] = escape(eat()); } else { lexstr[0] = curr_char; } lexstr[1] = '\0'; eat(); // Reach the closing quote. // Error: for situations like '\' if (curr_char != '\'') { jas_err("Character literal missing closing quote.", curr_line, lo_col, curr_col); return TOK_UNK; } eat(); // Get rid of ' and advance. return TOK_CHR_LIT; } // str_lit ::= "(\\.|[^\\"])*" if (curr_char == '"') { eat(); // Get first char of string. // Let by escape chars, but not single \ or ". int i; for (i = 0; curr_char != '"'; i++) { // Check that we don't close reach EOF before the close ". if (curr_char == EOF) { jas_err("EOF while parsing string literal.", curr_line, curr_col, curr_col); return TOK_UNK; } if (curr_char == '\\') { lexstr[i] = escape(eat()); } else { lexstr[i] = curr_char; } eat(); } lexstr[i] = '\0'; eat(); // Get rid of the " and advance. return TOK_STR_LIT; } // num_lit ::= [+-][1-9][0-9]* | [+-]0[0-7]* | [+-]0x[0-9A-Fa-f]+ // | [+-]0b[01]+ if ((issign(curr_char) && isdigit(peek())) || isdigit(curr_char)) { int base = 10; // Numeric base for interpreting the literal. int chars_read; // Keep track of how many columns we move forward. int sign = +1; // Grab sign if it exists. if (issign(curr_char)) { sign = (curr_char == '+' ? +1 : -1); eat(); // Get next number character. } // Choose base by prefix: if (curr_char == '0') { int next = peek(); if (next == 'x' || next == 'X') { base = HEX_BASE; } else if (next == 'b' || next == 'B') { base = BIN_BASE; } else { base = OCT_BASE; } } // Re-place sign back into lexstr for strtol. if (sign == +1) { lexstr[0] = '+'; } else { lexstr[0] = '-'; } // Copy in whole num literal, keep track of columns. chars_read = fgets_base(lexstr + 1, lexfile, base); curr_col += chars_read; // Convert to integer value, using saved sign. lexint = sign * strtol(lexstr, NULL, base); // Check for `int` size (we can support max of 32 bits) if (lexint < INT_MIN || UINT_MAX < lexint) { jas_err("Integer larger than 32 bits.", curr_line, lo_col, curr_col); } return TOK_NUM; } // Let by various punctuation: switch (curr_char) { case ',': eat(); return TOK_COMMA; case '.': eat(); return TOK_DOT; case '+': eat(); return TOK_PLUS; case '-': eat(); return TOK_MINUS; case '[': eat(); return TOK_LBRACKET; case ']': eat(); return TOK_RBRACKET; } jas_err("Unknown character encountered.", curr_line, lo_col, lo_col); eat(); // Advance to next char. } return TOK_EOF; }