Ejemplo n.º 1
0
static void
do_parse_commands (struct substring s, enum segmenter_mode mode,
                   struct hmap *dummies,
                   struct string *outputs, size_t n_outputs)
{
  struct segmenter segmenter;

  segmenter_init (&segmenter, mode);

  while (!ss_is_empty (s))
    {
      enum segment_type type;
      int n;

      n = segmenter_push (&segmenter, s.string, s.length, &type);
      assert (n >= 0);

      if (type == SEG_DO_REPEAT_COMMAND)
        {
          for (;;)
            {
              int k;

              k = segmenter_push (&segmenter, s.string + n, s.length - n,
                                  &type);
              if (type != SEG_NEWLINE && type != SEG_DO_REPEAT_COMMAND)
                break;

              n += k;
            }

          do_parse_commands (ss_head (s, n), mode, dummies,
                             outputs, n_outputs);
        }
      else if (type != SEG_END)
        {
          const struct dummy_var *dv;
          size_t i;

          dv = (type == SEG_IDENTIFIER
                ? find_dummy_var (dummies, s.string, n)
                : NULL);
          for (i = 0; i < n_outputs; i++)
            if (dv != NULL)
              ds_put_cstr (&outputs[i], dv->values[i]);
            else
              ds_put_substring (&outputs[i], ss_head (s, n));
        }

      ss_advance (&s, n);
    }
}
Ejemplo n.º 2
0
static void
check_segmentation (const char *input, size_t length, bool print_segments)
{
  size_t offset, line_number, line_offset;
  struct segmenter s;
  int prev_type;

  segmenter_init (&s, mode);

  line_number = 1;
  line_offset = 0;
  prev_type = -1;
  for (offset = 0; offset < length; )
    {
      enum segment_type type;
      const char *type_name, *p;
      int n;

      if (one_byte)
        {
          int n_newlines = 0;
          int i;

          for (i = 0; i <= length - offset; i++)
            {
              /* Make a copy to ensure that segmenter_push() isn't actually
                 looking ahead. */
              char *copy;

              if (i > 0 && input[offset + i - 1] == '\n')
                n_newlines++;

              copy = xmemdup (input + offset, i);
              n = segmenter_push (&s, copy, i, &type);
              free (copy);

              if (n >= 0)
                break;
            }
          assert (n_newlines <= 2);
        }
      else
        n = segmenter_push (&s, input + offset, length - offset, &type);

      if (n < 0)
        error (EXIT_FAILURE, 0, "segmenter_push returned -1 at offset %zu",
               offset);
      assert (offset + n <= length);

      if (type == SEG_NEWLINE)
        assert ((n == 1 && input[offset] == '\n')
                || (n == 2
                    && input[offset] == '\r' && input[offset + 1] == '\n'));
      else
        assert (memchr (&input[offset], '\n', n) == NULL);

      if (!print_segments)
        {
          offset += n;
          continue;
        }

      if (!verbose)
        {
          if (prev_type != SEG_SPACES && prev_type != -1
              && type == SEG_SPACES && n == 1 && input[offset] == ' ')
            {
              printf ("    space\n");
              offset++;
              prev_type = -1;
              continue;
            }
        }
      if (prev_type != -1)
        putchar ('\n');
      prev_type = type;

      if (verbose)
        printf ("%2zu:%2zu: ", line_number, offset - line_offset);

      type_name = segment_type_to_string (type);
      for (p = type_name; *p != '\0'; p++)
        putchar (tolower ((unsigned char) *p));
      if (n > 0)
        {
          int i;

          for (i = MIN (15, strlen (type_name)); i < 16; i++)
            putchar (' ');
          for (i = 0; i < n; )
            {
              const uint8_t *u_input = CHAR_CAST (const uint8_t *, input);
              ucs4_t uc;
              int mblen;

              mblen = u8_mbtoucr (&uc, u_input + (offset + i), n - i);
              if (mblen < 0)
                {
                  int j;

                  mblen = u8_mbtouc (&uc, u_input + (offset + i), n - i);
                  putchar ('<');
                  for (j = 0; j < mblen; j++)
                    {
                      if (j > 0)
                        putchar (' ');
                      printf ("%02x", input[offset + i + j]);
                    }
                  putchar ('>');
                }
              else
                {
                  switch (uc)
                    {
                    case ' ':
                      printf ("_");
                      break;

                    case '_':
                      printf ("\\_");
                      break;

                    case '\\':
                      printf ("\\\\");
                      break;

                    case '\t':
                      printf ("\\t");
                      break;

                    case '\r':
                      printf ("\\r");
                      break;

                    case '\n':
                      printf ("\\n");
                      break;

                    case '\v':
                      printf ("\\v");
                      break;

                    default:
                      if (uc < 0x20 || uc == 0x00a0)
                        printf ("<U+%04X>", uc);
                      else
                        fwrite (input + offset + i, 1, mblen, stdout);
                      break;
                    }
                }

              i += mblen;
            }
        }

      offset += n;
      if (type == SEG_NEWLINE)
        {
          enum prompt_style prompt;

          line_number++;
          line_offset = offset;

          prompt = segmenter_get_prompt (&s);
          printf (" (%s)\n", prompt_style_to_string (prompt));
        }
    }