Example #1
0
static int
phase4_getuc ()
{
  int c = phase3_getc ();

  if (c == EOF)
    return -1;
  if (c == '\\')
    {
      int c2 = phase3_getc ();

      if (c2 == 't')
	return '\t';
      if (c2 == 'n')
	return '\n';
      if (c2 == 'r')
	return '\r';
      if (c2 == 'f')
	return '\f';
      if (c2 == 'u')
	{
	  unsigned int n = 0;
	  int i;

	  for (i = 0; i < 4; i++)
	    {
	      int c1 = phase3_getc ();

	      if (c1 >= '0' && c1 <= '9')
		n = (n << 4) + (c1 - '0');
	      else if (c1 >= 'A' && c1 <= 'F')
		n = (n << 4) + (c1 - 'A' + 10);
	      else if (c1 >= 'a' && c1 <= 'f')
		n = (n << 4) + (c1 - 'a' + 10);
	      else
		{
		  phase3_ungetc (c1);
		  error_with_progname = false;
		  error (0, 0, _("%s:%lu: warning: invalid \\uxxxx syntax for Unicode character"),
			 real_file_name, (unsigned long) gram_pos.line_number);
		  error_with_progname = true;
		  return 'u';
		}
	    }
	  return n;
	}

      return c2;
    }
  else
    return c;
}
Example #2
0
File: x-c.c Project: alan707/senuti
static void
phase7_ungetc (int c)
{
  phase3_ungetc (c);
}
Example #3
0
File: x-c.c Project: alan707/senuti
static int
phase7_getc ()
{
  int c, n, j;

  /* Use phase 3, because phase 4 elides comments.  */
  c = phase3_getc ();

  /* Return a magic newline indicator, so that we can distinguish
     between the user requesting a newline in the string (e.g. using
     "\n" or "\012") from the user failing to terminate the string or
     character constant.  The ANSI C standard says: 3.1.3.4 Character
     Constants contain ``any character except single quote, backslash or
     newline; or an escape sequence'' and 3.1.4 String Literals contain
     ``any character except double quote, backslash or newline; or an
     escape sequence''.

     Most compilers give a fatal error in this case, however gcc is
     stupidly silent, even though this is a very common typo.  OK, so
     gcc --pedantic will tell me, but that gripes about too much other
     stuff.  Could I have a ``gcc -Wnewline-in-string'' option, or
     better yet a ``gcc -fno-newline-in-string'' option, please?  Gcc is
     also inconsistent between string literals and character constants:
     you may not embed newlines in character constants; try it, you get
     a useful diagnostic.  --PMiller  */
  if (c == '\n')
    return P7_NEWLINE;

  if (c == '"')
    return P7_QUOTES;
  if (c == '\'')
    return P7_QUOTE;
  if (c != '\\')
    return c;
  c = phase3_getc ();
  switch (c)
    {
    default:
      /* Unknown escape sequences really should be an error, but just
	 ignore them, and let the real compiler complain.  */
      phase3_ungetc (c);
      return '\\';

    case '"':
    case '\'':
    case '?':
    case '\\':
      return c;

    case 'a':
      return '\a';
    case 'b':
      return '\b';

      /* The \e escape is preculiar to gcc, and assumes an ASCII
	 character set (or superset).  We don't provide support for it
	 here.  */

    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case 'v':
      return '\v';

    case 'x':
      c = phase3_getc ();
      switch (c)
	{
	default:
	  phase3_ungetc (c);
	  phase3_ungetc ('x');
	  return '\\';

	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	  break;
	}
      n = 0;
      for (;;)
	{
	  switch (c)
	    {
	    default:
	      phase3_ungetc (c);
	      return n;

	    case '0': case '1': case '2': case '3': case '4':
	    case '5': case '6': case '7': case '8': case '9':
	      n = n * 16 + c - '0';
	      break;

	    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	      n = n * 16 + 10 + c - 'A';
	      break;

	    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	      n = n * 16 + 10 + c - 'a';
	      break;
	    }
	  c = phase3_getc ();
	}
      return n;

    case '0': case '1': case '2': case '3':
    case '4': case '5': case '6': case '7':
      n = 0;
      for (j = 0; j < 3; ++j)
	{
	  n = n * 8 + c - '0';
	  c = phase3_getc ();
	  switch (c)
	    {
	    default:
	      break;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
	      continue;
	    }
	  break;
	}
      phase3_ungetc (c);
      return n;
    }
}
Example #4
0
File: x-c.c Project: alan707/senuti
static int
phase4_getc ()
{
  int c;
  bool last_was_star;

  c = phase3_getc ();
  if (c != '/')
    return c;
  c = phase3_getc ();
  switch (c)
    {
    default:
      phase3_ungetc (c);
      return '/';

    case '*':
      /* C comment.  */
      comment_start ();
      last_was_star = false;
      for (;;)
	{
	  c = phase3_getc ();
	  if (c == EOF)
	    break;
	  /* We skip all leading white space, but not EOLs.  */
	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
	    comment_add (c);
	  switch (c)
	    {
	    case '\n':
	      comment_line_end (1);
	      comment_start ();
	      last_was_star = false;
	      continue;

	    case '*':
	      last_was_star = true;
	      continue;

	    case '/':
	      if (last_was_star)
		{
		  comment_line_end (2);
		  break;
		}
	      /* FALLTHROUGH */

	    default:
	      last_was_star = false;
	      continue;
	    }
	  break;
	}
      last_comment_line = newline_count;
      return ' ';

    case '/':
      /* C++ or ISO C 99 comment.  */
      comment_start ();
      for (;;)
	{
	  c = phase3_getc ();
	  if (c == '\n' || c == EOF)
	    break;
	  /* We skip all leading white space, but not EOLs.  */
	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
	    comment_add (c);
	}
      comment_line_end (0);
      last_comment_line = newline_count;
      return '\n';
    }
}
Example #5
0
static char *
read_escaped_string (bool in_key)
{
  static unsigned short *buffer;
  static size_t bufmax;
  static size_t buflen;
  int c;

  /* Skip whitespace before the string.  */
  do
    c = phase3_getc ();
  while (c == ' ' || c == '\t' || c == '\r' || c == '\f');

  if (c == EOF || c == '\n')
    /* Empty string.  */
    return NULL;

  /* Start accumulating the string.  We store the string in UTF-16 before
     converting it to UTF-8.  Why not converting every character directly to
     UTF-8? Because a string can contain surrogates like \uD800\uDF00, and
     we must combine them to a single UTF-8 character.  */
  buflen = 0;
  for (;;)
    {
      if (in_key && (c == '=' || c == ':'
		     || c == ' ' || c == '\t' || c == '\r' || c == '\f'))
	{
	  /* Skip whitespace after the string.  */
	  while (c == ' ' || c == '\t' || c == '\r' || c == '\f')
	    c = phase3_getc ();
	  /* Skip '=' or ':' separator.  */
	  if (!(c == '=' || c == ':'))
	    phase3_ungetc (c);
	  break;
	}

      phase3_ungetc (c);

      /* Read the next UTF-16 codepoint.  */
      c = phase4_getuc ();
      if (c < 0)
	break;
      /* Append it to the buffer.  */
      if (buflen >= bufmax)
	{
	  bufmax += 100;
	  buffer = xrealloc (buffer, bufmax * sizeof (unsigned short));
	}
      buffer[buflen++] = c;

      c = phase3_getc ();
      if (c == EOF || c == '\n')
	{
	  if (in_key)
	    phase3_ungetc (c);
	  break;
	}
    }

  /* Now convert from UTF-16 to UTF-8.  */
  {
    size_t pos;
    unsigned char *utf8_string;
    unsigned char *q;

    /* Each UTF-16 word needs 3 bytes at worst.  */
    utf8_string = (unsigned char *) xmalloc (3 * buflen + 1);
    for (pos = 0, q = utf8_string; pos < buflen; )
      {
	unsigned int uc;
	int n;

	pos += u16_mbtouc (&uc, buffer + pos, buflen - pos);
	n = u8_uctomb (q, uc, 6);
	assert (n > 0);
	q += n;
      }
    *q = '\0';
    assert (q - utf8_string <= 3 * buflen);

    return (char *) utf8_string;
  }
}