Example #1
0
/* Read and record the parameters, if any, of a function-like macro
   definition.  Destroys pfile->out.cur.

   Returns true on success, false on failure (syntax error or a
   duplicate parameter).  On success, CUR (pfile->context) is just
   past the closing parenthesis.  */
static bool
scan_parameters (cpp_reader *pfile, cpp_macro *macro)
{
  const uchar *cur = CUR (pfile->context) + 1;
  bool ok;

  for (;;)
    {
      cur = skip_whitespace (pfile, cur, true /* skip_comments */);

      if (is_idstart (*cur))
	{
	  ok = false;
	  if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
	    break;
	  cur = skip_whitespace (pfile, CUR (pfile->context),
				 true /* skip_comments */);
	  if (*cur == ',')
	    {
	      cur++;
	      continue;
	    }
	  ok = (*cur == ')');
	  break;
	}

      ok = (*cur == ')' && macro->paramc == 0);
      break;
    }

  if (!ok)
    cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");

  CUR (pfile->context) = cur + (*cur == ')');

  return ok;
}
Example #2
0
/* Copies the next logical line in the current buffer (starting at
   buffer->cur) to the output buffer.  The output is guaranteed to
   terminate with a NUL character.  buffer->cur is updated.

   If MACRO is non-NULL, then we are scanning the replacement list of
   MACRO, and we call save_replacement_text() every time we meet an
   argument.  */
bool
_cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro)
{
  bool result = true;
  cpp_context *context;
  const uchar *cur;
  uchar *out;
  struct fun_macro fmacro;
  unsigned int c, paren_depth = 0, quote;
  enum ls lex_state = ls_none;
  bool header_ok;
  const uchar *start_of_input_line;

  fmacro.buff = NULL;

  quote = 0;
  header_ok = pfile->state.angled_headers;
  CUR (pfile->context) = pfile->buffer->cur;
  RLIMIT (pfile->context) = pfile->buffer->rlimit;
  pfile->out.cur = pfile->out.base;
  pfile->out.first_line = pfile->line;
  /* start_of_input_line is needed to make sure that directives really,
     really start at the first character of the line.  */
  start_of_input_line = pfile->buffer->cur;
 new_context:
  context = pfile->context;
  cur = CUR (context);
  check_output_buffer (pfile, RLIMIT (context) - cur);
  out = pfile->out.cur;

  for (;;)
    {
      if (!context->prev
	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
	{
	  pfile->buffer->cur = cur;
	  _cpp_process_line_notes (pfile, false);
	}
      c = *cur++;
      *out++ = c;

      /* Whitespace should "continue" out of the switch,
	 non-whitespace should "break" out of it.  */
      switch (c)
	{
	case ' ':
	case '\t':
	case '\f':
	case '\v':
	case '\0':
	  continue;

	case '\n':
	  /* If this is a macro's expansion, pop it.  */
	  if (context->prev)
	    {
	      pfile->out.cur = out - 1;
	      _cpp_pop_context (pfile);
	      goto new_context;
	    }

	  /* Omit the newline from the output buffer.  */
	  pfile->out.cur = out - 1;
	  pfile->buffer->cur = cur;
	  pfile->buffer->need_line = true;
	  pfile->line++;

	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
	      && !pfile->state.in_directive
	      && _cpp_get_fresh_line (pfile))
	    {
	      /* Newlines in arguments become a space, but we don't
		 clear any in-progress quote.  */
	      if (lex_state == ls_fun_close)
		out[-1] = ' ';
	      cur = pfile->buffer->cur;
	      continue;
	    }
	  goto done;

	case '<':
	  if (header_ok)
	    quote = '>';
	  break;
	case '>':
	  if (c == quote)
	    quote = 0;
	  break;

	case '"':
	case '\'':
	  if (c == quote)
	    quote = 0;
	  else if (!quote)
	    quote = c;
	  break;

	case '\\':
	  /* Skip escaped quotes here, it's easier than above.  */
	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
	    *out++ = *cur++;
	  break;

	case '/':
	  /* Traditional CPP does not recognize comments within
	     literals.  */
	  if (!quote && *cur == '*')
	    {
	      pfile->out.cur = out;
	      cur = copy_comment (pfile, cur, macro != 0);
	      out = pfile->out.cur;
	      continue;
	    }
	  break;

	case '_':
	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
	case 'y': case 'z':
	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
	case 'Y': case 'Z':
	  if (!pfile->state.skipping && (quote == 0 || macro))
	    {
	      cpp_hashnode *node;
	      uchar *out_start = out - 1;

	      pfile->out.cur = out_start;
	      node = lex_identifier (pfile, cur - 1);
	      out = pfile->out.cur;
	      cur = CUR (context);

	      if (node->type == NT_MACRO
		  /* Should we expand for ls_answer?  */
		  && (lex_state == ls_none || lex_state == ls_fun_open)
		  && !pfile->state.prevent_expansion)
		{
		  /* Macros invalidate MI optimization.  */
		  pfile->mi_valid = false;
		  if (! (node->flags & NODE_BUILTIN)
		      && node->value.macro->fun_like)
		    {
		      maybe_start_funlike (pfile, node, out_start, &fmacro);
		      lex_state = ls_fun_open;
		      fmacro.line = pfile->line;
		      continue;
		    }
		  else if (!recursive_macro (pfile, node))
		    {
		      /* Remove the object-like macro's name from the
			 output, and push its replacement text.  */
		      pfile->out.cur = out_start;
		      push_replacement_text (pfile, node);
		      lex_state = ls_none;
		      goto new_context;
		    }
		}
	      else if (macro && (node->flags & NODE_MACRO_ARG) != 0)
		{
		  /* Found a parameter in the replacement text of a
		     #define.  Remove its name from the output.  */
		  pfile->out.cur = out_start;
		  save_replacement_text (pfile, macro, node->value.arg_index);
		  out = pfile->out.base;
		}
	      else if (lex_state == ls_hash)
		{
		  lex_state = ls_predicate;
		  continue;
		}
	      else if (pfile->state.in_expression
		       && node == pfile->spec_nodes.n_defined)
		{
		  lex_state = ls_defined;
		  continue;
		}
	    }
	  break;

	case '(':
	  if (quote == 0)
	    {
	      paren_depth++;
	      if (lex_state == ls_fun_open)
		{
		  if (recursive_macro (pfile, fmacro.node))
		    lex_state = ls_none;
		  else
		    {
		      lex_state = ls_fun_close;
		      paren_depth = 1;
		      out = pfile->out.base + fmacro.offset;
		      fmacro.args[0] = fmacro.offset;
		    }
		}
	      else if (lex_state == ls_predicate)
		lex_state = ls_answer;
	      else if (lex_state == ls_defined)
		lex_state = ls_defined_close;
	    }
	  break;

	case ',':
	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
	    save_argument (&fmacro, out - pfile->out.base);
	  break;

	case ')':
	  if (quote == 0)
	    {
	      paren_depth--;
	      if (lex_state == ls_fun_close && paren_depth == 0)
		{
		  cpp_macro *m = fmacro.node->value.macro;

		  m->used = 1;
		  lex_state = ls_none;
		  save_argument (&fmacro, out - pfile->out.base);

		  /* A single zero-length argument is no argument.  */
		  if (fmacro.argc == 1
		      && m->paramc == 0
		      && out == pfile->out.base + fmacro.offset + 1)
		    fmacro.argc = 0;

		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
		    {
		      /* Remove the macro's invocation from the
			 output, and push its replacement text.  */
		      pfile->out.cur = (pfile->out.base
					     + fmacro.offset);
		      CUR (context) = cur;
		      replace_args_and_push (pfile, &fmacro);
		      goto new_context;
		    }
		}
	      else if (lex_state == ls_answer || lex_state == ls_defined_close)
		lex_state = ls_none;
	    }
	  break;

	case '#':
	  if (cur - 1 == start_of_input_line
	      /* A '#' from a macro doesn't start a directive.  */
	      && !pfile->context->prev
	      && !pfile->state.in_directive)
	    {
	      /* A directive.  With the way _cpp_handle_directive
		 currently works, we only want to call it if either we
		 know the directive is OK, or we want it to fail and
		 be removed from the output.  If we want it to be
		 passed through (the assembler case) then we must not
		 call _cpp_handle_directive.  */
	      pfile->out.cur = out;
	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
	      out = pfile->out.cur;

	      if (*cur == '\n')
		{
		  /* Null directive.  Ignore it and don't invalidate
		     the MI optimization.  */
		  pfile->buffer->need_line = true;
		  pfile->line++;
		  result = false;
		  goto done;
		}
	      else
		{
		  bool do_it = false;

		  if (is_numstart (*cur)
		      && CPP_OPTION (pfile, lang) != CLK_ASM)
		    do_it = true;
		  else if (is_idstart (*cur))
		    /* Check whether we know this directive, but don't
		       advance.  */
		    do_it = lex_identifier (pfile, cur)->is_directive;

		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
		    {
		      /* This is a kludge.  We want to have the ISO
			 preprocessor lex the next token.  */
		      pfile->buffer->cur = cur;
		      _cpp_handle_directive (pfile, false /* indented */);
		      result = false;
		      goto done;
		    }
		}
	    }

	  if (pfile->state.in_expression)
	    {
	      lex_state = ls_hash;
	      continue;
	    }
	  break;

	default:
	  break;
	}

      /* Non-whitespace disables MI optimization and stops treating
	 '<' as a quote in #include.  */
      header_ok = false;
      if (!pfile->state.in_directive)
	pfile->mi_valid = false;

      if (lex_state == ls_none)
	continue;

      /* Some of these transitions of state are syntax errors.  The
	 ISO preprocessor will issue errors later.  */
      if (lex_state == ls_fun_open)
	/* Missing '('.  */
	lex_state = ls_none;
      else if (lex_state == ls_hash
	       || lex_state == ls_predicate
	       || lex_state == ls_defined)
	lex_state = ls_none;

      /* ls_answer and ls_defined_close keep going until ')'.  */
    }

 done:
  if (fmacro.buff)
    _cpp_release_buff (pfile, fmacro.buff);

  if (lex_state == ls_fun_close)
    cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
			 "unterminated argument list invoking macro \"%s\"",
			 NODE_NAME (fmacro.node));
  return result;
}
Example #3
0
/*
 * Gets the next token from the `lexfile` FILE stream.
 * Side effects:
 *  - If the TokenType has an associated string, it is found in global `lexstr`.
 *  - If the TokenType has an associated integer value, look in global `lexint`.
 */
TokenType next_tok(void) {
    if (!curr_char) eat(); // Eat first char.

    while (curr_char != EOF) {
        lo_col = curr_col; // Save first col of the token.

        // Newline.
        if (curr_char == '\n') {
            eat();
            return TOK_NL;
        }

        // Skip whitespace.
        if (isspace(curr_char)) {
            eat();
            continue;
        }

        // Skip comments until next line.
        if (curr_char == ';') {
            do {
                eat();
            } while (curr_char != '\n' && curr_char != EOF);
            continue;
        }

        // id ::= [A-Za-z$_][A-Za-z_$0-9]*
        // reg ::= r([0-9]|1[0-5])[abcd] | rs | re[0-6] | rk[0-7]
        // label ::= <nonopcode id>:
        if (is_idstart(curr_char)) {
            int i;
            // TODO: Make this loop prettier.
            for (i = 0; curr_char != EOF; i++) {
                lexstr[i] = curr_char;
                if (is_idcont(peek())) {
                    eat();
                } else {
                    break;
                }
            }
            lexstr[++i] = '\0';
            eat(); // Advance to next char after the identifier.

            // Register?
            if (lexstr[0] == 'r') {

                // Long or short
                if (is_long_reg(lexstr))
                    return TOK_GL_REG;
                if (is_short_reg(lexstr))
                    return TOK_GS_REG;

                // Extra
                if (is_extra_reg(lexstr))
                    return TOK_E_REG;

                // Kernel
                if (is_kernel_reg(lexstr))
                    return TOK_K_REG;
            }

            // Directive?
            if (is_dtv(lexstr)) {
                return TOK_DATA_SEG;
            }

            // Instruction?
            if (isInstruction(lexstr))
                return TOK_INSTR;

            // Label?
            if (curr_char == ':') {
                eat(); // Eat the ':'
                return TOK_LABEL;
            }

            // Plain identfier
            return TOK_ID;
        }

        // chr_lit ::= '[^\\']'
        if (curr_char == '\'') {
            eat(); // Get inner char.
            if (curr_char == '\\') {
                lexstr[0] = escape(eat());
            } else {
                lexstr[0] = curr_char;
            }
            lexstr[1] = '\0';
            eat(); // Reach the closing quote.

            // Error: for situations like '\'
            if (curr_char != '\'') {
                jas_err("Character literal missing closing quote.",
                         curr_line, lo_col, curr_col);
                return TOK_UNK;
            }

            eat(); // Get rid of ' and advance.
            return TOK_CHR_LIT;
        }

        // str_lit ::= "(\\.|[^\\"])*"
        if (curr_char == '"') {
            eat(); // Get first char of string.

            // Let by escape chars, but not single \ or ".
            int i;
            for (i = 0; curr_char != '"'; i++) {
                // Check that we don't close reach EOF before the close ".
                if (curr_char == EOF) {
                    jas_err("EOF while parsing string literal.",
                            curr_line, curr_col, curr_col);
                    return TOK_UNK;
                }

                if (curr_char == '\\') {
                    lexstr[i] = escape(eat());
                } else {
                    lexstr[i] = curr_char;
                }
                eat();
            }
            lexstr[i] = '\0';

            eat(); // Get rid of the " and advance.
            return TOK_STR_LIT;
        }

        // num_lit ::= [+-][1-9][0-9]* | [+-]0[0-7]* | [+-]0x[0-9A-Fa-f]+
        //          |  [+-]0b[01]+
        if ((issign(curr_char) && isdigit(peek())) || isdigit(curr_char)) {
            int base = 10;  // Numeric base for interpreting the literal.
            int chars_read; // Keep track of how many columns we move forward.
            int sign = +1;

            // Grab sign if it exists.
            if (issign(curr_char)) {
                sign = (curr_char == '+' ? +1 : -1);
                eat(); // Get next number character.
            }

            // Choose base by prefix:
            if (curr_char == '0') {
                int next = peek();
                if (next == 'x' || next == 'X') {
                    base = HEX_BASE;
                } else if (next == 'b' || next == 'B') {
                    base = BIN_BASE;
                } else {
                    base = OCT_BASE;
                }
            }

            // Re-place sign back into lexstr for strtol.
            if (sign == +1) {
                lexstr[0] = '+';
            } else {
                lexstr[0] = '-';
            }

            // Copy in whole num literal, keep track of columns.
            chars_read = fgets_base(lexstr + 1, lexfile, base);
            curr_col += chars_read;

            // Convert to integer value, using saved sign.
            lexint = sign * strtol(lexstr, NULL, base);

            // Check for `int` size (we can support max of 32 bits)
            if (lexint < INT_MIN || UINT_MAX < lexint) {
                jas_err("Integer larger than 32 bits.",
                        curr_line, lo_col, curr_col);
            }

            return TOK_NUM;
        }

        // Let by various punctuation:
        switch (curr_char) {
            case ',': eat(); return TOK_COMMA;
            case '.': eat(); return TOK_DOT;
            case '+': eat(); return TOK_PLUS;
            case '-': eat(); return TOK_MINUS;
            case '[': eat(); return TOK_LBRACKET;
            case ']': eat(); return TOK_RBRACKET;
        }

        jas_err("Unknown character encountered.", curr_line, lo_col, lo_col);
        eat(); // Advance to next char.
    }

    return TOK_EOF;
}