Beispiel #1
0
void
Pcompile (char const *pattern, size_t size)
{
#if !HAVE_LIBPCRE
  error (EXIT_TROUBLE, 0, "%s",
         _("support for the -P option is not compiled into "
           "this --disable-perl-regexp binary"));
#else
  int e;
  char const *ep;
  char *re = xnmalloc (4, size + 7);
  int flags = (PCRE_MULTILINE
               | (match_icase ? PCRE_CASELESS : 0));
  char const *patlim = pattern + size;
  char *n = re;
  char const *p;
  char const *pnul;

  if (using_utf8 ())
    flags |= PCRE_UTF8;
  else if (MB_CUR_MAX != 1)
    error (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));

  /* FIXME: Remove these restrictions.  */
  if (memchr (pattern, '\n', size))
    error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));

  *n = '\0';
  if (match_lines)
    strcpy (n, "^(?:");
  if (match_words)
    strcpy (n, "(?<!\\w)(?:");
  n += strlen (n);

  /* The PCRE interface doesn't allow NUL bytes in the pattern, so
     replace each NUL byte in the pattern with the four characters
     "\000", removing a preceding backslash if there are an odd
     number of backslashes before the NUL.

     FIXME: This method does not work with some multibyte character
     encodings, notably Shift-JIS, where a multibyte character can end
     in a backslash byte.  */
  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
    {
      memcpy (n, p, pnul - p);
      n += pnul - p;
      for (p = pnul; pattern < p && p[-1] == '\\'; p--)
        continue;
      n -= (pnul - p) & 1;
      strcpy (n, "\\000");
      n += 4;
    }

  memcpy (n, p, patlim - p);
  n += patlim - p;
  *n = '\0';
  if (match_words)
    strcpy (n, ")(?!\\w)");
  if (match_lines)
    strcpy (n, ")$");

  cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
  if (!cre)
    error (EXIT_TROUBLE, 0, "%s", ep);

  extra = pcre_study (cre, PCRE_STUDY_JIT_COMPILE, &ep);
  if (ep)
    error (EXIT_TROUBLE, 0, "%s", ep);

# if PCRE_STUDY_JIT_COMPILE
  if (pcre_fullinfo (cre, extra, PCRE_INFO_JIT, &e))
    error (EXIT_TROUBLE, 0, _("internal error (should never happen)"));

  /* The PCRE documentation says that a 32 KiB stack is the default.  */
  if (e)
    jit_stack_size = 32 << 10;
# endif

  free (re);

  int sub[NSUB];
  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
                                  PCRE_NOTBOL, sub, NSUB);
  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
void
Pcompile (char const *pattern, size_t size)
{
#if !HAVE_LIBPCRE
  error (EXIT_TROUBLE, 0, "%s",
         _("support for the -P option is not compiled into "
           "this --disable-perl-regexp binary"));
#else
  int e;
  char const *ep;
  char *re = xnmalloc (4, size + 7);
  int flags = (PCRE_MULTILINE
               | (match_icase ? PCRE_CASELESS : 0)
               | (using_utf8 () ? PCRE_UTF8 : 0));
  char const *patlim = pattern + size;
  char *n = re;
  char const *p;
  char const *pnul;

  /* FIXME: Remove these restrictions.  */
  if (memchr (pattern, '\n', size))
    error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));

  *n = '\0';
  if (match_lines)
    strcpy (n, "^(?:");
  if (match_words)
    strcpy (n, "(?<!\\w)(?:");
  n += strlen (n);

  /* The PCRE interface doesn't allow NUL bytes in the pattern, so
     replace each NUL byte in the pattern with the four characters
     "\000", removing a preceding backslash if there are an odd
     number of backslashes before the NUL.

     FIXME: This method does not work with some multibyte character
     encodings, notably Shift-JIS, where a multibyte character can end
     in a backslash byte.  */
  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
    {
      memcpy (n, p, pnul - p);
      n += pnul - p;
      for (p = pnul; pattern < p && p[-1] == '\\'; p--)
        continue;
      n -= (pnul - p) & 1;
      strcpy (n, "\\000");
      n += 4;
    }

  memcpy (n, p, patlim - p);
  n += patlim - p;
  *n = '\0';
  if (match_words)
    strcpy (n, ")(?!\\w)");
  if (match_lines)
    strcpy (n, ")$");

  cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
  if (!cre)
    error (EXIT_TROUBLE, 0, "%s", ep);

  extra = pcre_study (cre, PCRE_STUDY_JIT_COMPILE, &ep);
  if (ep)
    error (EXIT_TROUBLE, 0, "%s", ep);

# if PCRE_STUDY_JIT_COMPILE
  if (pcre_fullinfo (cre, extra, PCRE_INFO_JIT, &e))
    error (EXIT_TROUBLE, 0, _("internal error (should never happen)"));

  if (e)
    {
      /* A 32K stack is allocated for the machine code by default, which
         can grow to 512K if necessary. Since JIT uses far less memory
         than the interpreter, this should be enough in practice.  */
      jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
      if (!jit_stack)
        error (EXIT_TROUBLE, 0,
               _("failed to allocate memory for the PCRE JIT stack"));
      pcre_assign_jit_stack (extra, NULL, jit_stack);
    }
# endif
  free (re);
#endif /* HAVE_LIBPCRE */
}
Beispiel #3
0
char *display_string(const char *buf, size_t start_col, size_t len, bool dollars)
{
	size_t start_index;
	/* Index in buf of the first character shown. */
	size_t column;
	/* Screen column that start_index corresponds to. */
	size_t alloc_len;
	/* The length of memory allocated for converted. */
	char *converted;
	/* The string we return. */
	size_t index;
	/* Current position in converted. */
	char *buf_mb;
	int buf_mb_len;

	/* If dollars is true, make room for the "$" at the end of the
	 * line. */
	if (dollars && len > 0 && strlenpt(buf) > start_col + len) {
		len--;
	}

	if (len == 0) {
		return mallocstrcpy(NULL, "");
	}

	buf_mb = charalloc(mb_cur_max());

	start_index = actual_x(buf, start_col);
	column = strnlenpt(buf, start_index);

	assert(column <= start_col);

	/* Make sure there's enough room for the initial character, whether
	 * it's a multibyte control character, a non-control multibyte
	 * character, a tab character, or a null terminator.  Rationale:
	 *
	 * multibyte control character followed by a null terminator:
	 *     1 byte ('^') + mb_cur_max() bytes + 1 byte ('\0')
	 * multibyte non-control character followed by a null terminator:
	 *     mb_cur_max() bytes + 1 byte ('\0')
	 * tab character followed by a null terminator:
	 *     mb_cur_max() bytes + (tabsize - 1) bytes + 1 byte ('\0')
	 *
	 * Since tabsize has a minimum value of 1, it can substitute for 1
	 * byte above. */
	alloc_len = (mb_cur_max() + tabsize + 1) * MAX_BUF_SIZE;
	converted = charalloc(alloc_len);

	index = 0;

	if (buf[start_index] != '\0' && buf[start_index] != '\t' && (column < start_col || (dollars && column > 0))) {
		/* We don't display all of buf[start_index] since it starts to
		 * the left of the screen. */
		buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);

		if (is_cntrl_mbchar(buf_mb)) {
			if (column < start_col) {
				char *ctrl_buf_mb = charalloc(mb_cur_max());
				int ctrl_buf_mb_len, i;

				ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, &ctrl_buf_mb_len);

				for (i = 0; i < ctrl_buf_mb_len; i++) {
					converted[index++] = ctrl_buf_mb[i];
				}

				start_col += mbwidth(ctrl_buf_mb);

				free(ctrl_buf_mb);

				start_index += buf_mb_len;
			}
		} else if (using_utf8() && mbwidth(buf_mb) == 2) {
			if (column >= start_col) {
				converted[index++] = ' ';
				start_col++;
			}

			converted[index++] = ' ';
			start_col++;

			start_index += buf_mb_len;
		}
	}

	while (buf[start_index] != '\0') {
		buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);

		/* Make sure there's enough room for the next character, whether
		 * it's a multibyte control character, a non-control multibyte
		 * character, a tab character, or a null terminator. */
		if (index + mb_cur_max() + tabsize + 1 >= alloc_len - 1) {
			alloc_len += (mb_cur_max() + tabsize + 1) * MAX_BUF_SIZE;
			converted = charealloc(converted, alloc_len);
		}

		/* If buf contains a tab character, interpret it. */
		if (*buf_mb == '\t') {
			if (ISSET(WHITESPACE_DISPLAY)) {
				int i;

				for (i = 0; i < whitespace_len[0]; i++) {
					converted[index++] = whitespace[i];
				}
			} else {
				converted[index++] = ' ';
			}
			start_col++;
			while (start_col % tabsize != 0) {
				converted[index++] = ' ';
				start_col++;
			}
		} else if (is_cntrl_mbchar(buf_mb)) {
			/* If buf contains a control character, interpret it. */
			char *ctrl_buf_mb = charalloc(mb_cur_max());
			int ctrl_buf_mb_len, i;

			converted[index++] = '^';
			start_col++;

			ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, &ctrl_buf_mb_len);

			for (i = 0; i < ctrl_buf_mb_len; i++) {
				converted[index++] = ctrl_buf_mb[i];
			}

			start_col += mbwidth(ctrl_buf_mb);

			free(ctrl_buf_mb);
			/* If buf contains a space character, interpret it. */
		} else if (*buf_mb == ' ') {
			if (ISSET(WHITESPACE_DISPLAY)) {
				int i;

				for (i = whitespace_len[0]; i < whitespace_len[0] + whitespace_len[1]; i++) {
					converted[index++] = whitespace[i];
				}
			} else {
				converted[index++] = ' ';
			}
			start_col++;
		} else {
			/* If buf contains a non-control character, interpret it.  If buf
			 * contains an invalid multibyte sequence, display it as such. */
			char *nctrl_buf_mb = charalloc(mb_cur_max());
			int nctrl_buf_mb_len, i;

			/* Make sure an invalid sequence-starter byte is properly
			 * terminated, so that it doesn't pick up lingering bytes
			 * of any previous content. */
			null_at(&buf_mb, buf_mb_len);

			nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb, &nctrl_buf_mb_len);

			for (i = 0; i < nctrl_buf_mb_len; i++) {
				converted[index++] = nctrl_buf_mb[i];
			}

			start_col += mbwidth(nctrl_buf_mb);

			free(nctrl_buf_mb);
		}

		start_index += buf_mb_len;
	}

	free(buf_mb);

	assert(alloc_len >= index + 1);

	/* Null-terminate converted. */
	converted[index] = '\0';

	/* Make sure converted takes up no more than len columns. */
	index = actual_x(converted, len);
	null_at(&converted, index);

	return converted;
}