static void do_parse_commands (struct substring s, enum segmenter_mode mode, struct hmap *dummies, struct string *outputs, size_t n_outputs) { struct segmenter segmenter; segmenter_init (&segmenter, mode); while (!ss_is_empty (s)) { enum segment_type type; int n; n = segmenter_push (&segmenter, s.string, s.length, &type); assert (n >= 0); if (type == SEG_DO_REPEAT_COMMAND) { for (;;) { int k; k = segmenter_push (&segmenter, s.string + n, s.length - n, &type); if (type != SEG_NEWLINE && type != SEG_DO_REPEAT_COMMAND) break; n += k; } do_parse_commands (ss_head (s, n), mode, dummies, outputs, n_outputs); } else if (type != SEG_END) { const struct dummy_var *dv; size_t i; dv = (type == SEG_IDENTIFIER ? find_dummy_var (dummies, s.string, n) : NULL); for (i = 0; i < n_outputs; i++) if (dv != NULL) ds_put_cstr (&outputs[i], dv->values[i]); else ds_put_substring (&outputs[i], ss_head (s, n)); } ss_advance (&s, n); } }
static void check_segmentation (const char *input, size_t length, bool print_segments) { size_t offset, line_number, line_offset; struct segmenter s; int prev_type; segmenter_init (&s, mode); line_number = 1; line_offset = 0; prev_type = -1; for (offset = 0; offset < length; ) { enum segment_type type; const char *type_name, *p; int n; if (one_byte) { int n_newlines = 0; int i; for (i = 0; i <= length - offset; i++) { /* Make a copy to ensure that segmenter_push() isn't actually looking ahead. */ char *copy; if (i > 0 && input[offset + i - 1] == '\n') n_newlines++; copy = xmemdup (input + offset, i); n = segmenter_push (&s, copy, i, &type); free (copy); if (n >= 0) break; } assert (n_newlines <= 2); } else n = segmenter_push (&s, input + offset, length - offset, &type); if (n < 0) error (EXIT_FAILURE, 0, "segmenter_push returned -1 at offset %zu", offset); assert (offset + n <= length); if (type == SEG_NEWLINE) assert ((n == 1 && input[offset] == '\n') || (n == 2 && input[offset] == '\r' && input[offset + 1] == '\n')); else assert (memchr (&input[offset], '\n', n) == NULL); if (!print_segments) { offset += n; continue; } if (!verbose) { if (prev_type != SEG_SPACES && prev_type != -1 && type == SEG_SPACES && n == 1 && input[offset] == ' ') { printf (" space\n"); offset++; prev_type = -1; continue; } } if (prev_type != -1) putchar ('\n'); prev_type = type; if (verbose) printf ("%2zu:%2zu: ", line_number, offset - line_offset); type_name = segment_type_to_string (type); for (p = type_name; *p != '\0'; p++) putchar (tolower ((unsigned char) *p)); if (n > 0) { int i; for (i = MIN (15, strlen (type_name)); i < 16; i++) putchar (' '); for (i = 0; i < n; ) { const uint8_t *u_input = CHAR_CAST (const uint8_t *, input); ucs4_t uc; int mblen; mblen = u8_mbtoucr (&uc, u_input + (offset + i), n - i); if (mblen < 0) { int j; mblen = u8_mbtouc (&uc, u_input + (offset + i), n - i); putchar ('<'); for (j = 0; j < mblen; j++) { if (j > 0) putchar (' '); printf ("%02x", input[offset + i + j]); } putchar ('>'); } else { switch (uc) { case ' ': printf ("_"); break; case '_': printf ("\\_"); break; case '\\': printf ("\\\\"); break; case '\t': printf ("\\t"); break; case '\r': printf ("\\r"); break; case '\n': printf ("\\n"); break; case '\v': printf ("\\v"); break; default: if (uc < 0x20 || uc == 0x00a0) printf ("<U+%04X>", uc); else fwrite (input + offset + i, 1, mblen, stdout); break; } } i += mblen; } } offset += n; if (type == SEG_NEWLINE) { enum prompt_style prompt; line_number++; line_offset = offset; prompt = segmenter_get_prompt (&s); printf (" (%s)\n", prompt_style_to_string (prompt)); } }