bool
match(const char *str, const char *pattern)
{
	int		r;

	r = MatchText(str, strlen(str), pattern, strlen(pattern), NULL);

	return (r == LIKE_TRUE);
}
List *
capture(const char *str, const char *pattern, int nparams)
{
	int		r;
	List   *params = NIL;

	r = MatchText(str, strlen(str), pattern, strlen(pattern), &params);
	if (r == LIKE_TRUE && list_length(params) == nparams)
		return params;

	list_free_deep(params);
	return NIL;
}
Exemple #3
0
bool FeedFilter::Term::MatchValue(const char* szStrValue, long long iIntValue)
{
	double fFloatValue = (double)iIntValue;
	char szIntBuf[100];

	if (m_eCommand < fcEqual && !szStrValue)
	{
		snprintf(szIntBuf, 100, "%lld", iIntValue);
		szIntBuf[100-1] = '\0';
		szStrValue = szIntBuf;
	}

	else if (m_eCommand >= fcEqual && szStrValue)
	{
		fFloatValue = atof(szStrValue);
		iIntValue = (long long)fFloatValue;
	}

	switch (m_eCommand)
	{
		case fcText:
			return MatchText(szStrValue);

		case fcRegex:
			return MatchRegex(szStrValue);

		case fcEqual:
			return m_bFloat ? fFloatValue == m_fFloatParam : iIntValue == m_iIntParam;

		case fcLess:
			return m_bFloat ? fFloatValue < m_fFloatParam : iIntValue < m_iIntParam;

		case fcLessEqual:
			return m_bFloat ? fFloatValue <= m_fFloatParam : iIntValue <= m_iIntParam;

		case fcGreater:
			return m_bFloat ? fFloatValue > m_fFloatParam : iIntValue > m_iIntParam;

		case fcGreaterEqual:
			return m_bFloat ? fFloatValue >= m_fFloatParam : iIntValue >= m_iIntParam;

		default:
			return false;
	}
}
static int
MatchText(char *t, int tlen, char *p, int plen,
		  pg_locale_t locale, bool locale_is_c)
{
	/* Fast path for match-everything pattern */
	if (plen == 1 && *p == '%')
		return LIKE_TRUE;

	/*
	 * In this loop, we advance by char when matching wildcards (and thus on
	 * recursive entry to this function we are properly char-synced). On other
	 * occasions it is safe to advance by byte, as the text and pattern will
	 * be in lockstep. This allows us to perform all comparisons between the
	 * text and pattern on a byte by byte basis, even for multi-byte
	 * encodings.
	 */
	while (tlen > 0 && plen > 0)
	{
		if (*p == '\\')
		{
			/* Next pattern byte must match literally, whatever it is */
			NextByte(p, plen);
			/* ... and there had better be one, per SQL standard */
			if (plen <= 0)
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
				 errmsg("LIKE pattern must not end with escape character")));
			if (GETCHAR(*p) != GETCHAR(*t))
				return LIKE_FALSE;
		}
		else if (*p == '%')
		{
			char		firstpat;

			/*
			 * % processing is essentially a search for a text position at
			 * which the remainder of the text matches the remainder of the
			 * pattern, using a recursive call to check each potential match.
			 *
			 * If there are wildcards immediately following the %, we can skip
			 * over them first, using the idea that any sequence of N _'s and
			 * one or more %'s is equivalent to N _'s and one % (ie, it will
			 * match any sequence of at least N text characters).  In this way
			 * we will always run the recursive search loop using a pattern
			 * fragment that begins with a literal character-to-match, thereby
			 * not recursing more than we have to.
			 */
			NextByte(p, plen);

			while (plen > 0)
			{
				if (*p == '%')
					NextByte(p, plen);
				else if (*p == '_')
				{
					/* If not enough text left to match the pattern, ABORT */
					if (tlen <= 0)
						return LIKE_ABORT;
					NextChar(t, tlen);
					NextByte(p, plen);
				}
				else
					break;		/* Reached a non-wildcard pattern char */
			}

			/*
			 * If we're at end of pattern, match: we have a trailing % which
			 * matches any remaining text string.
			 */
			if (plen <= 0)
				return LIKE_TRUE;

			/*
			 * Otherwise, scan for a text position at which we can match the
			 * rest of the pattern.  The first remaining pattern char is known
			 * to be a regular or escaped literal character, so we can compare
			 * the first pattern byte to each text byte to avoid recursing
			 * more than we have to.  This fact also guarantees that we don't
			 * have to consider a match to the zero-length substring at the
			 * end of the text.
			 */
			if (*p == '\\')
			{
				if (plen < 2)
					ereport(ERROR,
							(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
							 errmsg("LIKE pattern must not end with escape character")));
				firstpat = GETCHAR(p[1]);
			}
			else
				firstpat = GETCHAR(*p);

			while (tlen > 0)
			{
				if (GETCHAR(*t) == firstpat)
				{
					int			matched = MatchText(t, tlen, p, plen,
													locale, locale_is_c);

					if (matched != LIKE_FALSE)
						return matched; /* TRUE or ABORT */
				}

				NextChar(t, tlen);
			}

			/*
			 * End of text with no match, so no point in trying later places
			 * to start matching this pattern.
			 */
			return LIKE_ABORT;
		}
		else if (*p == '_')
		{
			/* _ matches any single character, and we know there is one */
			NextChar(t, tlen);
			NextByte(p, plen);
			continue;
		}
		else if (GETCHAR(*p) != GETCHAR(*t))
		{
			/* non-wildcard pattern char fails to match text char */
			return LIKE_FALSE;
		}

		/*
		 * Pattern and text match, so advance.
		 *
		 * It is safe to use NextByte instead of NextChar here, even for
		 * multi-byte character sets, because we are not following immediately
		 * after a wildcard character. If we are in the middle of a multibyte
		 * character, we must already have matched at least one byte of the
		 * character from both text and pattern; so we cannot get out-of-sync
		 * on character boundaries.  And we know that no backend-legal
		 * encoding allows ASCII characters such as '%' to appear as non-first
		 * bytes of characters, so we won't mistakenly detect a new___ wildcard.
		 */
		NextByte(t, tlen);
		NextByte(p, plen);
	}

	if (tlen > 0)
		return LIKE_FALSE;		/* end of pattern, but not of text */

	/*
	 * End of text, but perhaps not of pattern.  Match iff the remaining
	 * pattern can match a zero-length string, ie, it's zero or more %'s.
	 */
	while (plen > 0 && *p == '%')
		NextByte(p, plen);
	if (plen <= 0)
		return LIKE_TRUE;

	/*
	 * End of text with no match, so no point in trying later places to start
	 * matching this pattern.
	 */
	return LIKE_ABORT;
}	/* MatchText() */
static int
MatchText(char *t, int tlen, char *p, int plen)
{
	/* Fast path for match-everything pattern */
	if ((plen == 1) && (*p == '%'))
		return LIKE_TRUE;

	/*
	 * In this loop, we advance by char when matching wildcards (and thus on
	 * recursive entry to this function we are properly char-synced). On other
	 * occasions it is safe to advance by byte, as the text and pattern will
	 * be in lockstep. This allows us to perform all comparisons  between the
	 * text and pattern on a byte by byte basis, even for multi-byte
	 * encodings.
	 */

	while ((tlen > 0) && (plen > 0))
	{
		if (*p == '\\')
		{
			/* Next byte must match literally, whatever it is */
			NextByte(p, plen);
			if ((plen <= 0) || TCHAR(*p) != TCHAR(*t))
				return LIKE_FALSE;
		}
		else if (*p == '%')
		{
			/*
			 * % processing is essentially a search for a match for what
			 * follows the %, plus a recursive match of the remainder. We
			 * succeed if and only if both conditions are met.
			 */

			/* %% is the same as % according to the SQL standard */
			/* Advance past all %'s */
			while ((plen > 0) && (*p == '%'))
				NextByte(p, plen);
			/* Trailing percent matches everything. */
			if (plen <= 0)
				return LIKE_TRUE;

			/*
			 * Otherwise, scan for a text position at which we can match the
			 * rest of the pattern.
			 */
			if (*p == '_')

			{
				/* %_ is the same as _% - avoid matching _ repeatedly */

				NextChar(t, tlen);
				NextByte(p, plen);

				if (tlen <= 0)
				{
					return (plen <= 0) ? LIKE_TRUE : LIKE_ABORT;
				}
				else if (plen <= 0)
				{
					return LIKE_FALSE;
				}

				while (tlen > 0)
				{
					int			matched = MatchText(t, tlen, p, plen);

					if (matched != LIKE_FALSE)
						return matched; /* TRUE or ABORT */

					NextChar(t, tlen);
				}
			}
			else
			{

				char		firstpat = TCHAR(*p);

				if (*p == '\\')
				{
					if (plen < 2)
						return LIKE_FALSE;
					firstpat = TCHAR(p[1]);
				}

				while (tlen > 0)
				{
					/*
					 * Optimization to prevent most recursion: don't recurse
					 * unless first pattern byte matches first text byte.
					 */
					if (TCHAR(*t) == firstpat)
					{
						int			matched = MatchText(t, tlen, p, plen);

						if (matched != LIKE_FALSE)
							return matched;		/* TRUE or ABORT */
					}

					NextChar(t, tlen);

				}
			}

			/*
			 * End of text with no match, so no point in trying later places
			 * to start matching this pattern.
			 */
			return LIKE_ABORT;
		}
		else if (*p == '_')
		{
			NextChar(t, tlen);
			NextByte(p, plen);
			continue;
		}
		else if (TCHAR(*t) != TCHAR(*p))
		{
			/*
			 * Not the single-character wildcard and no explicit match? Then
			 * time to quit...
			 */
			return LIKE_FALSE;
		}

		/*
		 * It is safe to use NextByte instead of NextChar here, even for
		 * multi-byte character sets, because we are not following immediately
		 * after a wildcard character. If we are in the middle of a multibyte
		 * character, we must already have matched at least one byte of the
		 * character from both text and pattern; so we cannot get out-of-sync
		 * on character boundaries.  And we know that no backend-legal
		 * encoding allows ASCII characters such as '%' to appear as non-first
		 * bytes of characters, so we won't mistakenly detect a new wildcard.
		 */
		NextByte(t, tlen);
		NextByte(p, plen);
	}

	if (tlen > 0)
		return LIKE_FALSE;		/* end of pattern, but not of text */

	/* End of input string.  Do we have matching pattern remaining? */
	while ((plen > 0) && (*p == '%'))	/* allow multiple %'s at end of
										 * pattern */
		NextByte(p, plen);

	if (plen <= 0)
		return LIKE_TRUE;

	/*
	 * End of text with no match, so no point in trying later places to start
	 * matching this pattern.
	 */
	return LIKE_ABORT;
}	/* MatchText() */
Exemple #6
0
static int
MatchText(unsigned char *t, int tlen, unsigned char *p, int plen)
{
	/* Fast path for match-everything pattern */
	if ((plen == 1) && (*p == '%'))
		return LIKE_TRUE;

	while ((tlen > 0) && (plen > 0))
	{
		if (*p == '\\')
		{
			/* Next pattern char must match literally, whatever it is */
			NextChar(p, plen);
			if ((plen <= 0) || !CHAREQ(t, p))
				return LIKE_FALSE;
		}
		else if (*p == '%')
		{
			/* %% is the same as % according to the SQL standard */
			/* Advance past all %'s */
			while ((plen > 0) && (*p == '%'))
				NextChar(p, plen);
			/* Trailing percent matches everything. */
			if (plen <= 0)
				return LIKE_TRUE;

			/*
			 * Otherwise, scan for a text position at which we can match
			 * the rest of the pattern.
			 */
			while (tlen > 0)
			{
				/*
				 * Optimization to prevent most recursion: don't recurse
				 * unless first pattern char might match this text char.
				 */
				if (CHAREQ(t, p) || (*p == '\\') || (*p == '_'))
				{
					int			matched = MatchText(t, tlen, p, plen);

					if (matched != LIKE_FALSE)
						return matched; /* TRUE or ABORT */
				}

				NextChar(t, tlen);
			}

			/*
			 * End of text with no match, so no point in trying later
			 * places to start matching this pattern.
			 */
			return LIKE_ABORT;
		}
		else if ((*p != '_') && !CHAREQ(t, p))
		{
			/*
			 * Not the single-character wildcard and no explicit match?
			 * Then time to quit...
			 */
			return LIKE_FALSE;
		}

		NextChar(t, tlen);
		NextChar(p, plen);
	}

	if (tlen > 0)
		return LIKE_FALSE;		/* end of pattern, but not of text */

	/* End of input string.  Do we have matching pattern remaining? */
	while ((plen > 0) && (*p == '%'))	/* allow multiple %'s at end of
										 * pattern */
		NextChar(p, plen);
	if (plen <= 0)
		return LIKE_TRUE;

	/*
	 * End of text with no match, so no point in trying later places to
	 * start matching this pattern.
	 */
	return LIKE_ABORT;
}	/* MatchText() */
static int
MatchText(const char *t, size_t tlen, const char *p, size_t plen, List **params)
{
	while (tlen > 0 && plen > 0)
	{
		if (plen < 2 || *p != '%')
		{
			/* non-wildcard pattern char fails to match text char */
			if (*p != *t)
				return LIKE_FALSE;
		}
		else if (p[1] == '%')
		{
			/* %% is % */
			NextByte(p, plen);
			if (*p != *t)
				return LIKE_FALSE;
		}
		else
		{
			const char *begin = p;
			const char *w = t;
			char		firstpat;

			/* Skip until the type specifer */
			p = strpbrk(begin + 1, "diouxXeEfFgGaAcspm");
			if (p == NULL)
				return LIKE_FALSE;	/* bad format */
			p++;
			plen -= p - begin;
			if (plen <= 0)
			{
				if (params)
					*params = lcons(strdup_with_len(t, tlen), *params);
				return LIKE_TRUE;	/* matches everything. */
			}

			/*
			 * Otherwise, scan for a text position at which we can match the
			 * rest of the pattern.
			 */
			firstpat = *p;

			while (tlen > 0)
			{
				/*
				 * Optimization to prevent most recursion: don't recurse
				 * unless first pattern byte matches first text byte.
				 */
				if (*t == firstpat)
				{
					int		matched = MatchText(t, tlen, p, plen, params);

					if (matched == LIKE_TRUE && params)
						*params = lcons(strdup_with_len(w, t - w), *params);
					if (matched != LIKE_FALSE)
						return matched;		/* TRUE or ABORT */
				}

				NextChar(t, tlen);
			}

			/*
			 * End of text with no match, so no point in trying later places
			 * to start matching this pattern.
			 */
			return LIKE_ABORT;
		}

		NextByte(t, tlen);
		NextByte(p, plen);
	}
	if (tlen > 0)
		return LIKE_FALSE;		/* end of pattern, but not of text */

	/* End of text string.	Do we have matching pattern remaining? */
	while (plen > 0 && *p == '%')
	{
		const char *begin = p;
		p = strpbrk(begin + 1, "diouxXeEfFgGaAcspm");
		if (p == NULL)
			return LIKE_FALSE;	/* bad format */
		p++;
		plen -= p - begin;
	}
	if (plen <= 0)
		return LIKE_TRUE;

	/*
	 * End of text with no match, so no point in trying later places to start
	 * matching this pattern.
	 */
	return LIKE_ABORT;
}