示例#1
0
/* Compare the initial segment of the character string STRING consisting of
   at most mbslen (PREFIX) characters with the character string PREFIX,
   ignoring case.  If the two match, return a pointer to the first byte
   after this prefix in STRING.  Otherwise, return NULL.
   Note: This function may, in multibyte locales, return non-NULL if STRING
   is of smaller length than PREFIX!  */
char *
mbspcasecmp (const char *string, const char *prefix)
{
  /* This is essentially the same as
       mbsncasecmp (string, prefix, mbslen (prefix))
     just with small optimizations.  */
  if (string == prefix)
    return (char *) (string + strlen (string));

  /* Be careful not to look at the entire extent of STRING or PREFIX until
     needed.  This is useful because when two strings differ, the difference is
     most often already in the very few first characters.  */
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter1;
      mbui_iterator_t iter2;

      mbui_init (iter1, string);
      mbui_init (iter2, prefix);

      while (mbui_avail (iter1) && mbui_avail (iter2))
        {
          int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));

          if (cmp != 0)
            return NULL;

          mbui_advance (iter1);
          mbui_advance (iter2);
        }
      if (!mbui_avail (iter2))
        /* PREFIX equals STRING or is terminated before STRING.  */
        return (char *) mbui_cur_ptr (iter1);
      else
        /* STRING terminated before PREFIX.  */
        return NULL;
    }
  else
    {
      const unsigned char *p1 = (const unsigned char *) string;
      const unsigned char *p2 = (const unsigned char *) prefix;
      unsigned char c1, c2;

      for (; ; p1++, p2++)
        {
          c1 = TOLOWER (*p1);
          c2 = TOLOWER (*p2);

          if (c2 == '\0' || c1 != c2)
            break;
        }

      if (c2 == '\0')
        /* PREFIX equals STRING or is terminated before STRING.  */
        return (char *) p1;
      else
        /* STRING terminated before PREFIX.  */
        return NULL;
    }
}
示例#2
0
文件: mbschr.c 项目: djmitche/gnulib
/* Locate the first single-byte character C in the character string STRING,
   and return a pointer to it.  Return NULL if C is not found in STRING.  */
char *
mbschr (const char *string, int c)
{
  if (MB_CUR_MAX > 1
      /* Optimization: We know that ASCII characters < 0x30 don't occur as
         part of multibyte characters longer than 1 byte.  Hence, if c < 0x30,
         the faster unibyte loop can be used.  */
      && (unsigned char) c >= 0x30)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string);; mbui_advance (iter))
        {
          if (!mbui_avail (iter))
            goto notfound;
          if (mb_len (mbui_cur (iter)) == 1
              && (unsigned char) * mbui_cur_ptr (iter) == (unsigned char) c)
            break;
        }
      return (char *) mbui_cur_ptr (iter);
     notfound:
      return NULL;
    }
  else
    return strchr (string, c);
}
示例#3
0
/* Find the first occurrence in the character string STRING of any character
   in the character string ACCEPT.  Return the number of bytes from the
   beginning of the string to this occurrence, or to the end of the string
   if none exists.  */
size_t
mbscspn (const char *string, const char *accept)
{
  /* Optimize two cases.  */
  if (accept[0] == '\0')
    return strlen (string);
  if (accept[1] == '\0')
    {
      const char *ptr = mbschr (string, accept[0]);
      return (ptr != NULL ? ptr - string : strlen (string));
    }
  /* General case.  */
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        {
          if (mb_len (mbui_cur (iter)) == 1)
            {
              if (mbschr (accept, * mbui_cur_ptr (iter)))
                goto found;
            }
          else
            {
              mbui_iterator_t aiter;

              for (mbui_init (aiter, accept);
                   mbui_avail (aiter);
                   mbui_advance (aiter))
                if (mb_equal (mbui_cur (aiter), mbui_cur (iter)))
                  goto found;
            }
        }
     found:
      return mbui_cur_ptr (iter) - string;
    }
  else
    return strcspn (string, accept);
}
示例#4
0
/* Find the first occurrence in the character string STRING of any character
   in the character string ACCEPT.  Return the pointer to it, or NULL if none
   exists.  */
char *
mbspbrk (const char *string, const char *accept)
{
  /* Optimize two cases.  */
  if (accept[0] == '\0')
    return NULL;
  if (accept[1] == '\0')
    return mbschr (string, accept[0]);
  /* General case.  */
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        {
          if (mb_len (mbui_cur (iter)) == 1)
            {
              if (mbschr (accept, * mbui_cur_ptr (iter)))
                return (char *) mbui_cur_ptr (iter);
            }
          else
            {
              mbui_iterator_t aiter;

              for (mbui_init (aiter, accept);
                   mbui_avail (aiter);
                   mbui_advance (aiter))
                if (mb_equal (mbui_cur (aiter), mbui_cur (iter)))
                  return (char *) mbui_cur_ptr (iter);
            }
        }
      return NULL;
    }
  else
    return strpbrk (string, accept);
}
示例#5
0
文件: mbslen.c 项目: komh/gnulib-os2
/* Return the number of multibyte characters in the character string STRING.  */
size_t
mbslen (const char *string)
{
  if (MB_CUR_MAX > 1)
    {
      size_t count;
      mbui_iterator_t iter;

      count = 0;
      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        count++;

      return count;
    }
  else
    return strlen (string);
}
示例#6
0
/* Ditto, for case-insensitive hashes */
static size_t
string_hasher_ci (void const *data, size_t n_buckets)
{
  char const *p = data;
  mbui_iterator_t iter;
  size_t value = 0;

  for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
    {
      mbchar_t m = mbui_cur (iter);
      wchar_t wc;

      if (m.wc_valid)
        wc = towlower (m.wc);
      else
        wc = *m.ptr;

      value = (value * 31 + wc) % n_buckets;
    }

  return value;
}
示例#7
0
char *
mbssep (char **stringp, const char *delim)
{
    if (MB_CUR_MAX > 1)
    {
        char *start = *stringp;
        char *ptr;

        if (start == NULL)
            return NULL;

        /* No need to optimize the cases of 0 or 1 delimiters specially,
           since mbspbrk already optimizes them.  */

        ptr = mbspbrk (start, delim);

        if (ptr == NULL)
        {
            *stringp = NULL;
            return start;
        }
        else
        {
            mbui_iterator_t iter;

            mbui_init (iter, ptr);
            if (!mbui_avail (iter))
                abort ();
            mbui_advance (iter);
            *ptr = '\0';
            *stringp = (char *) mbui_cur_ptr (iter);
            return start;
        }
    }
    else
        return strsep (stringp, delim);
}
示例#8
0
/* Knuth-Morris-Pratt algorithm.
   See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
   Return a boolean indicating success:
   Return true and set *RESULTP if the search was completed.
   Return false if it was aborted because not enough memory was available.  */
static bool
knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
                              const char **resultp)
{
  size_t m = mbslen (needle);
  mbchar_t *needle_mbchars;
  size_t *table;

  /* Allocate room for needle_mbchars and the table.  */
  char *memory = (char *) nmalloca (m, sizeof (mbchar_t) + sizeof (size_t));
  if (memory == NULL)
    return false;
  needle_mbchars = (mbchar_t *) memory;
  table = (size_t *) (memory + m * sizeof (mbchar_t));

  /* Fill needle_mbchars.  */
  {
    mbui_iterator_t iter;
    size_t j;

    j = 0;
    for (mbui_init (iter, needle); mbui_avail (iter); mbui_advance (iter), j++)
      mb_copy (&needle_mbchars[j], &mbui_cur (iter));
  }

  /* Fill the table.
     For 0 < i < m:
       0 < table[i] <= i is defined such that
       forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
       and table[i] is as large as possible with this property.
     This implies:
     1) For 0 < i < m:
          If table[i] < i,
          needle[table[i]..i-1] = needle[0..i-1-table[i]].
     2) For 0 < i < m:
          rhaystack[0..i-1] == needle[0..i-1]
          and exists h, i <= h < m: rhaystack[h] != needle[h]
          implies
          forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
     table[0] remains uninitialized.  */
  {
    size_t i, j;

    /* i = 1: Nothing to verify for x = 0.  */
    table[1] = 1;
    j = 0;

    for (i = 2; i < m; i++)
      {
        /* Here: j = i-1 - table[i-1].
           The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
           for x < table[i-1], by induction.
           Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
        mbchar_t *b = &needle_mbchars[i - 1];

        for (;;)
          {
            /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
               is known to hold for x < i-1-j.
               Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
            if (mb_equal (*b, needle_mbchars[j]))
              {
                /* Set table[i] := i-1-j.  */
                table[i] = i - ++j;
                break;
              }
            /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
               for x = i-1-j, because
                 needle[i-1] != needle[j] = needle[i-1-x].  */
            if (j == 0)
              {
                /* The inequality holds for all possible x.  */
                table[i] = i;
                break;
              }
            /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
               for i-1-j < x < i-1-j+table[j], because for these x:
                 needle[x..i-2]
                 = needle[x-(i-1-j)..j-1]
                 != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
                    = needle[0..i-2-x],
               hence needle[x..i-1] != needle[0..i-1-x].
               Furthermore
                 needle[i-1-j+table[j]..i-2]
                 = needle[table[j]..j-1]
                 = needle[0..j-1-table[j]]  (by definition of table[j]).  */
            j = j - table[j];
          }
        /* Here: j = i - table[i].  */
      }
  }

  /* Search, using the table to accelerate the processing.  */
  {
    size_t j;
    mbui_iterator_t rhaystack;
    mbui_iterator_t phaystack;

    *resultp = NULL;
    j = 0;
    mbui_init (rhaystack, haystack);
    mbui_init (phaystack, haystack);
    /* Invariant: phaystack = rhaystack + j.  */
    while (mbui_avail (phaystack))
      if (mb_equal (needle_mbchars[j], mbui_cur (phaystack)))
        {
          j++;
          mbui_advance (phaystack);
          if (j == m)
            {
              /* The entire needle has been found.  */
              *resultp = mbui_cur_ptr (rhaystack);
              break;
            }
        }
      else if (j > 0)
        {
          /* Found a match of needle[0..j-1], mismatch at needle[j].  */
          size_t count = table[j];
          j -= count;
          for (; count > 0; count--)
            {
              if (!mbui_avail (rhaystack))
                abort ();
              mbui_advance (rhaystack);
            }
        }
      else
        {
          /* Found a mismatch at needle[0] already.  */
          if (!mbui_avail (rhaystack))
            abort ();
          mbui_advance (rhaystack);
          mbui_advance (phaystack);
        }
  }

  freea (memory);
  return true;
}
示例#9
0
/* Find the first occurrence of the character string NEEDLE in the character
   string HAYSTACK.  Return NULL if NEEDLE is not found in HAYSTACK.  */
char *
mbsstr (const char *haystack, const char *needle)
{
  /* Be careful not to look at the entire extent of haystack or needle
     until needed.  This is useful because of these two cases:
       - haystack may be very long, and a match of needle found early,
       - needle may be very long, and not even a short initial segment of
         needle may be found in haystack.  */
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter_needle;

      mbui_init (iter_needle, needle);
      if (mbui_avail (iter_needle))
        {
          /* Minimizing the worst-case complexity:
             Let n = mbslen(haystack), m = mbslen(needle).
             The naïve algorithm is O(n*m) worst-case.
             The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
             memory allocation.
             To achieve linear complexity and yet amortize the cost of the
             memory allocation, we activate the Knuth-Morris-Pratt algorithm
             only once the naïve algorithm has already run for some time; more
             precisely, when
               - the outer loop count is >= 10,
               - the average number of comparisons per outer loop is >= 5,
               - the total number of comparisons is >= m.
             But we try it only once.  If the memory allocation attempt failed,
             we don't retry it.  */
          bool try_kmp = true;
          size_t outer_loop_count = 0;
          size_t comparison_count = 0;
          size_t last_ccount = 0;                  /* last comparison count */
          mbui_iterator_t iter_needle_last_ccount; /* = needle + last_ccount */

          mbui_iterator_t iter_haystack;

          mbui_init (iter_needle_last_ccount, needle);
          mbui_init (iter_haystack, haystack);
          for (;; mbui_advance (iter_haystack))
            {
              if (!mbui_avail (iter_haystack))
                /* No match.  */
                return NULL;

              /* See whether it's advisable to use an asymptotically faster
                 algorithm.  */
              if (try_kmp
                  && outer_loop_count >= 10
                  && comparison_count >= 5 * outer_loop_count)
                {
                  /* See if needle + comparison_count now reaches the end of
                     needle.  */
                  size_t count = comparison_count - last_ccount;
                  for (;
                       count > 0 && mbui_avail (iter_needle_last_ccount);
                       count--)
                    mbui_advance (iter_needle_last_ccount);
                  last_ccount = comparison_count;
                  if (!mbui_avail (iter_needle_last_ccount))
                    {
                      /* Try the Knuth-Morris-Pratt algorithm.  */
                      const char *result;
                      bool success =
                        knuth_morris_pratt_multibyte (haystack, needle,
                                                      &result);
                      if (success)
                        return (char *) result;
                      try_kmp = false;
                    }
                }

              outer_loop_count++;
              comparison_count++;
              if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle)))
                /* The first character matches.  */
                {
                  mbui_iterator_t rhaystack;
                  mbui_iterator_t rneedle;

                  memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
                  mbui_advance (rhaystack);

                  mbui_init (rneedle, needle);
                  if (!mbui_avail (rneedle))
                    abort ();
                  mbui_advance (rneedle);

                  for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
                    {
                      if (!mbui_avail (rneedle))
                        /* Found a match.  */
                        return (char *) mbui_cur_ptr (iter_haystack);
                      if (!mbui_avail (rhaystack))
                        /* No match.  */
                        return NULL;
                      comparison_count++;
                      if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle)))
                        /* Nothing in this round.  */
                        break;
                    }
                }
            }
        }
      else
        return (char *) haystack;
    }
  else
    {
      if (*needle != '\0')
        {
          /* Minimizing the worst-case complexity:
             Let n = strlen(haystack), m = strlen(needle).
             The naïve algorithm is O(n*m) worst-case.
             The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
             memory allocation.
             To achieve linear complexity and yet amortize the cost of the
             memory allocation, we activate the Knuth-Morris-Pratt algorithm
             only once the naïve algorithm has already run for some time; more
             precisely, when
               - the outer loop count is >= 10,
               - the average number of comparisons per outer loop is >= 5,
               - the total number of comparisons is >= m.
             But we try it only once.  If the memory allocation attempt failed,
             we don't retry it.  */
          bool try_kmp = true;
          size_t outer_loop_count = 0;
          size_t comparison_count = 0;
          size_t last_ccount = 0;                  /* last comparison count */
          const char *needle_last_ccount = needle; /* = needle + last_ccount */

          /* Speed up the following searches of needle by caching its first
             character.  */
          char b = *needle++;

          for (;; haystack++)
            {
              if (*haystack == '\0')
                /* No match.  */
                return NULL;

              /* See whether it's advisable to use an asymptotically faster
                 algorithm.  */
              if (try_kmp
                  && outer_loop_count >= 10
                  && comparison_count >= 5 * outer_loop_count)
                {
                  /* See if needle + comparison_count now reaches the end of
                     needle.  */
                  if (needle_last_ccount != NULL)
                    {
                      needle_last_ccount +=
                        strnlen (needle_last_ccount,
                                 comparison_count - last_ccount);
                      if (*needle_last_ccount == '\0')
                        needle_last_ccount = NULL;
                      last_ccount = comparison_count;
                    }
                  if (needle_last_ccount == NULL)
                    {
                      /* Try the Knuth-Morris-Pratt algorithm.  */
                      const char *result;
                      bool success =
                        knuth_morris_pratt_unibyte (haystack, needle - 1,
                                                    &result);
                      if (success)
                        return (char *) result;
                      try_kmp = false;
                    }
                }

              outer_loop_count++;
              comparison_count++;
              if (*haystack == b)
                /* The first character matches.  */
                {
                  const char *rhaystack = haystack + 1;
                  const char *rneedle = needle;

                  for (;; rhaystack++, rneedle++)
                    {
                      if (*rneedle == '\0')
                        /* Found a match.  */
                        return (char *) haystack;
                      if (*rhaystack == '\0')
                        /* No match.  */
                        return NULL;
                      comparison_count++;
                      if (*rhaystack != *rneedle)
                        /* Nothing in this round.  */
                        break;
                    }
                }
            }
        }
      else
        return (char *) haystack;
    }
}
示例#10
0
static bool
knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
			      const char **resultp)
{
  size_t m = mbslen (needle);
  mbchar_t *needle_mbchars;
  size_t *table;

  /* Allocate room for needle_mbchars and the table.  */
  char *memory = (char *) malloca (m * (sizeof (mbchar_t) + sizeof (size_t)));
  if (memory == NULL)
    return false;
  needle_mbchars = (mbchar_t *) memory;
  table = (size_t *) (memory + m * sizeof (mbchar_t));

  /* Fill needle_mbchars.  */
  {
    mbui_iterator_t iter;
    size_t j;

    j = 0;
    for (mbui_init (iter, needle); mbui_avail (iter); mbui_advance (iter), j++)
      mb_copy (&needle_mbchars[j], &mbui_cur (iter));
  }

  /* Fill the table.
     For 0 < i < m:
       0 < table[i] <= i is defined such that
       rhaystack[0..i-1] == needle[0..i-1] and rhaystack[i] != needle[i]
       implies
       forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1],
       and table[i] is as large as possible with this property.
     table[0] remains uninitialized.  */
  {
    size_t i, j;

    table[1] = 1;
    j = 0;
    for (i = 2; i < m; i++)
      {
	mbchar_t *b = &needle_mbchars[i - 1];

	for (;;)
	  {
	    if (mb_equal (*b, needle_mbchars[j]))
	      {
		table[i] = i - ++j;
		break;
	      }
	    if (j == 0)
	      {
		table[i] = i;
		break;
	      }
	    j = j - table[j];
	  }
      }
  }

  /* Search, using the table to accelerate the processing.  */
  {
    size_t j;
    mbui_iterator_t rhaystack;
    mbui_iterator_t phaystack;

    *resultp = NULL;
    j = 0;
    mbui_init (rhaystack, haystack);
    mbui_init (phaystack, haystack);
    /* Invariant: phaystack = rhaystack + j.  */
    while (mbui_avail (phaystack))
      if (mb_equal (needle_mbchars[j], mbui_cur (phaystack)))
	{
	  j++;
	  mbui_advance (phaystack);
	  if (j == m)
	    {
	      /* The entire needle has been found.  */
	      *resultp = mbui_cur_ptr (rhaystack);
	      break;
	    }
	}
      else if (j > 0)
	{
	  /* Found a match of needle[0..j-1], mismatch at needle[j].  */
	  size_t count = table[j];
	  j -= count;
	  for (; count > 0; count--)
	    {
	      if (!mbui_avail (rhaystack))
		abort ();
	      mbui_advance (rhaystack);
	    }
	}
      else
	{
	  /* Found a mismatch at needle[0] already.  */
	  if (!mbui_avail (rhaystack))
	    abort ();
	  mbui_advance (rhaystack);
	  mbui_advance (phaystack);
	}
  }

  freea (memory);
  return true;
}
示例#11
0
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
   greater than zero if S1 is lexicographically less than, equal to or greater
   than S2.
   Note: This function may, in multibyte locales, return 0 for strings of
   different lengths!  */
int
strcasecmp (const char *s1, const char *s2)
{
  if (s1 == s2)
    return 0;

  /* Be careful not to look at the entire extent of s1 or s2 until needed.
     This is useful because when two strings differ, the difference is
     most often already in the very few first characters.  */
#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter1;
      mbui_iterator_t iter2;

      mbui_init (iter1, s1);
      mbui_init (iter2, s2);

      while (mbui_avail (iter1) && mbui_avail (iter2))
	{
	  int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));

	  if (cmp != 0)
	    return cmp;

	  mbui_advance (iter1);
	  mbui_advance (iter2);
	}
      if (mbui_avail (iter1))
	/* s2 terminated before s1.  */
	return 1;
      if (mbui_avail (iter2))
	/* s1 terminated before s2.  */
	return -1;
      return 0;
    }
  else
#endif
    {
      const unsigned char *p1 = (const unsigned char *) s1;
      const unsigned char *p2 = (const unsigned char *) s2;
      unsigned char c1, c2;

      do
	{
	  c1 = TOLOWER (*p1);
	  c2 = TOLOWER (*p2);

	  if (c1 == '\0')
	    break;

	  ++p1;
	  ++p2;
	}
      while (c1 == c2);

      if (UCHAR_MAX <= INT_MAX)
	return c1 - c2;
      else
	/* On machines where 'char' and 'int' are types of the same size, the
	   difference of two 'unsigned char' values - including the sign bit -
	   doesn't fit in an 'int'.  */
	return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
    }
}
示例#12
0
/* Tests whether STRING contains trim (SUB), starting and ending at word
   boundaries.
   Here, instead of implementing Unicode Standard Annex #29 for determining
   word boundaries, we assume that trim (SUB) starts and ends with words and
   only test whether the part before it ends with a non-word and the part
   after it starts with a non-word.  */
static bool
mbsstr_trimmed_wordbounded (const char *string, const char *sub)
{
  char *tsub = trim (sub);
  bool found = false;

  for (; *string != '\0';)
    {
      const char *tsub_in_string = mbsstr (string, tsub);
      if (tsub_in_string == NULL)
        break;
      else
        {
          if (MB_CUR_MAX > 1)
            {
              mbui_iterator_t string_iter;
              bool word_boundary_before;
              bool word_boundary_after;

              mbui_init (string_iter, string);
              word_boundary_before = true;
              if (mbui_cur_ptr (string_iter) < tsub_in_string)
                {
                  mbchar_t last_char_before_tsub;
                  do
                    {
                      if (!mbui_avail (string_iter))
                        abort ();
                      last_char_before_tsub = mbui_cur (string_iter);
                      mbui_advance (string_iter);
                    }
                  while (mbui_cur_ptr (string_iter) < tsub_in_string);
                  if (mb_isalnum (last_char_before_tsub))
                    word_boundary_before = false;
                }

              mbui_init (string_iter, tsub_in_string);
              {
                mbui_iterator_t tsub_iter;

                for (mbui_init (tsub_iter, tsub);
                     mbui_avail (tsub_iter);
                     mbui_advance (tsub_iter))
                  {
                    if (!mbui_avail (string_iter))
                      abort ();
                    mbui_advance (string_iter);
                  }
              }
              word_boundary_after = true;
              if (mbui_avail (string_iter))
                {
                  mbchar_t first_char_after_tsub = mbui_cur (string_iter);
                  if (mb_isalnum (first_char_after_tsub))
                    word_boundary_after = false;
                }

              if (word_boundary_before && word_boundary_after)
                {
                  found = true;
                  break;
                }

              mbui_init (string_iter, tsub_in_string);
              if (!mbui_avail (string_iter))
                break;
              string = tsub_in_string + mb_len (mbui_cur (string_iter));
            }
          else
            {
              bool word_boundary_before;
              const char *p;
              bool word_boundary_after;

              word_boundary_before = true;
              if (string < tsub_in_string)
                if (isalnum ((unsigned char) tsub_in_string[-1]))
                  word_boundary_before = false;

              p = tsub_in_string + strlen (tsub);
              word_boundary_after = true;
              if (*p != '\0')
                if (isalnum ((unsigned char) *p))
                  word_boundary_after = false;

              if (word_boundary_before && word_boundary_after)
                {
                  found = true;
                  break;
                }

              if (*tsub_in_string == '\0')
                break;
              string = tsub_in_string + 1;
            }
        }
    }
  free (tsub);
  return found;
}
示例#13
0
文件: strstr.c 项目: Distrotech/cvs
/* Find the first occurrence of NEEDLE in HAYSTACK.  */
char *
strstr (const char *haystack, const char *needle)
{
  /* Be careful not to look at the entire extent of haystack or needle
     until needed.  This is useful because of these two cases:
       - haystack may be very long, and a match of needle found early,
       - needle may be very long, and not even a short initial segment of
         needle may be found in haystack.  */
#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter_needle;

      mbui_init (iter_needle, needle);
      if (mbui_avail (iter_needle))
	{
	  mbui_iterator_t iter_haystack;

	  mbui_init (iter_haystack, haystack);
	  for (;; mbui_advance (iter_haystack))
	    {
	      if (!mbui_avail (iter_haystack))
		/* No match.  */
		return NULL;

	      if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle)))
		/* The first character matches.  */
		{
		  mbui_iterator_t rhaystack;
		  mbui_iterator_t rneedle;

		  memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
		  mbui_advance (rhaystack);

		  mbui_init (rneedle, needle);
		  if (!mbui_avail (rneedle))
		    abort ();
		  mbui_advance (rneedle);

		  for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
		    {
		      if (!mbui_avail (rneedle))
			/* Found a match.  */
			return (char *) mbui_cur_ptr (iter_haystack);
		      if (!mbui_avail (rhaystack))
			/* No match.  */
			return NULL;
		      if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle)))
			/* Nothing in this round.  */
			break;
		    }
		}
	    }
	}
      else
	return (char *) haystack;
    }
  else
#endif
    {
      if (*needle != '\0')
	{
	  /* Speed up the following searches of needle by caching its first
	     character.  */
	  char b = *needle++;

	  for (;; haystack++)
	    {
	      if (*haystack == '\0')
		/* No match.  */
		return NULL;
	      if (*haystack == b)
		/* The first character matches.  */
		{
		  const char *rhaystack = haystack + 1;
		  const char *rneedle = needle;

		  for (;; rhaystack++, rneedle++)
		    {
		      if (*rneedle == '\0')
			/* Found a match.  */
			return (char *) haystack;
		      if (*rhaystack == '\0')
			/* No match.  */
			return NULL;
		      if (*rhaystack != *rneedle)
			/* Nothing in this round.  */
			break;
		    }
		}
	    }
	}
      else
	return (char *) haystack;
    }
}
示例#14
0
文件: mbsspn.c 项目: djmitche/gnulib
/* Find the first occurrence in the character string STRING of any character
   not in the character string REJECT.  Return the number of bytes from the
   beginning of the string to this occurrence, or to the end of the string
   if none exists.  */
size_t
mbsspn (const char *string, const char *reject)
{
  /* Optimize two cases.  */
  if (reject[0] == '\0')
    return 0;
  if (reject[1] == '\0')
    {
      unsigned char uc = (unsigned char) reject[0];

      if (MB_CUR_MAX > 1)
        {
          mbui_iterator_t iter;

          for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
            if (!(mb_len (mbui_cur (iter)) == 1
                  && (unsigned char) * mbui_cur_ptr (iter) == uc))
              break;
          return mbui_cur_ptr (iter) - string;
        }
      else
        {
          const char *ptr;

          for (ptr = string; *ptr != '\0'; ptr++)
            if ((unsigned char) *ptr != uc)
              break;
          return ptr - string;
        }
    }
  /* General case.  */
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        {
          if (mb_len (mbui_cur (iter)) == 1)
            {
              if (mbschr (reject, * mbui_cur_ptr (iter)) == NULL)
                goto found;
            }
          else
            {
              mbui_iterator_t aiter;

              for (mbui_init (aiter, reject);; mbui_advance (aiter))
                {
                  if (!mbui_avail (aiter))
                    goto found;
                  if (mb_equal (mbui_cur (aiter), mbui_cur (iter)))
                    break;
                }
            }
        }
     found:
      return mbui_cur_ptr (iter) - string;
    }
  else
    return strspn (string, reject);
}