Ejemplo n.º 1
0
static int 
p_isalpha(TParser *prs)
{
	Assert( prs->state );

	if (prs->usewide)
	{
		if (lc_ctype_is_c())
		{
			unsigned int c = *(prs->wstr + prs->state->poschar);

			/*
			 * any non-ascii symbol with multibyte encoding
			 * with C-locale is an alpha character
			 */
			if ( c > 0x7f )
				return 1;

			return isalpha(0xff & c);
		}

		return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar));
	}

	return isalpha( *(unsigned char*)( prs->str + prs->state->posbyte ));
}
Ejemplo n.º 2
0
/*
 * pg_set_regex_collation: set collation for these functions to obey
 *
 * This is called when beginning compilation or execution of a regexp.
 * Since there's no need for re-entrancy of regexp operations, it's okay
 * to store the results in static variables.
 */
void
pg_set_regex_collation(Oid collation)
{
	if (lc_ctype_is_c(collation))
	{
		/* C/POSIX collations use this path regardless of database encoding */
		pg_regex_strategy = PG_REGEX_LOCALE_C;
		pg_regex_locale = 0;
		pg_regex_collation = C_COLLATION_OID;
	}
	else
	{
		if (collation == DEFAULT_COLLATION_OID)
			pg_regex_locale = 0;
		else if (OidIsValid(collation))
		{
			/*
			 * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T;
			 * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not
			 * have to be considered below.
			 */
			pg_regex_locale = pg_newlocale_from_collation(collation);
		}
		else
		{
			/*
			 * This typically means that the parser could not resolve a
			 * conflict of implicit collations, so report it that way.
			 */
			ereport(ERROR,
					(errcode(ERRCODE_INDETERMINATE_COLLATION),
					 errmsg("could not determine which collation to use for regular expression"),
					 errhint("Use the COLLATE clause to set the collation explicitly.")));
		}

#ifdef USE_WIDE_UPPER_LOWER
		if (GetDatabaseEncoding() == PG_UTF8)
		{
			if (pg_regex_locale)
				pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
			else
				pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
		}
		else
#endif   /* USE_WIDE_UPPER_LOWER */
		{
			if (pg_regex_locale)
				pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
			else
				pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
		}

		pg_regex_collation = collation;
	}
}
Ejemplo n.º 3
0
int
_t_isprint(const char *ptr)
{
	wchar_t		character[2];

	if (lc_ctype_is_c())
		return isprint(TOUCHAR(ptr));

	char2wchar(character, ptr, 1);

	return iswprint((wint_t) *character);
}
Ejemplo n.º 4
0
int
t_isprint(const char *ptr)
{
	int			clen = pg_mblen(ptr);
	wchar_t		character[2];

	if (clen == 1 || lc_ctype_is_c())
		return isprint(TOUCHAR(ptr));

	char2wchar(character, 2, ptr, clen);

	return iswprint((wint_t) character[0]);
}
Ejemplo n.º 5
0
int
t_isprint(const char *ptr)
{
	int			clen = pg_mblen(ptr);
	wchar_t		character[2];
	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */
	pg_locale_t mylocale = 0;	/* TODO */

	if (clen == 1 || lc_ctype_is_c(collation))
		return isprint(TOUCHAR(ptr));

	char2wchar(character, 2, ptr, clen, mylocale);

	return iswprint((wint_t) character[0]);
}
Ejemplo n.º 6
0
size_t
char2wchar(wchar_t *to, const char *from, size_t len)
{
	if (len == 0)
		return 0;

#ifdef WIN32
	if (GetDatabaseEncoding() == PG_UTF8)
	{
		int			r;

		r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len);

		if (!r)
		{
			pg_verifymbstr(from, strlen(from), false);
			ereport(ERROR,
					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
					 errmsg("invalid multibyte character for locale"),
					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
		}

		Assert(r <= len);

		return r;
	}
	else 
#endif /* WIN32 */
	if ( lc_ctype_is_c() )
	{
		/*
		 * pg_mb2wchar_with_len always adds trailing '\0', so 
		 * 'to' should be allocated with sufficient space 
		 */
		return pg_mb2wchar_with_len(from, (pg_wchar *)to, len);
	}

	return mbstowcs(to, from, len);
}
Ejemplo n.º 7
0
/*
 * wchar2char --- convert wide characters to multibyte format
 *
 * This has the same API as the standard wcstombs() function; in particular,
 * tolen is the maximum number of bytes to store at *to, and *from must be
 * zero-terminated.  The output will be zero-terminated iff there is room.
 */
size_t
wchar2char(char *to, const wchar_t *from, size_t tolen)
{
	size_t		result;

	if (tolen == 0)
		return 0;

#ifdef WIN32

	/*
	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
	 * for some reason mbstowcs and wcstombs won't do this for us, so we use
	 * MultiByteToWideChar().
	 */
	if (GetDatabaseEncoding() == PG_UTF8)
	{
		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
									 NULL, NULL);
		/* A zero return is failure */
		if (result <= 0)
			result = -1;
		else
		{
			Assert(result <= tolen);
			/* Microsoft counts the zero terminator in the result */
			result--;
		}
	}
	else
#endif   /* WIN32 */
	{
		Assert(!lc_ctype_is_c());
		result = wcstombs(to, from, tolen);
	}
	return result;
}
Ejemplo n.º 8
0
/*
 * lowerstr_with_len --- fold string to lower case
 *
 * Input string need not be null-terminated.
 *
 * Returned string is palloc'd
 */
char *
lowerstr_with_len(const char *str, int len)
{
	char	   *out;

#ifdef USE_WIDE_UPPER_LOWER
	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */
	pg_locale_t mylocale = 0;	/* TODO */
#endif

	if (len == 0)
		return pstrdup("");

#ifdef USE_WIDE_UPPER_LOWER

	/*
	 * Use wide char code only when max encoding length > 1 and ctype != C.
	 * Some operating systems fail with multi-byte encodings and a C locale.
	 * Also, for a C locale there is no need to process as multibyte. From
	 * backend/utils/adt/oracle_compat.c Teodor
	 */
	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
	{
		wchar_t    *wstr,
				   *wptr;
		int			wlen;

		/*
		 * alloc number of wchar_t for worst case, len contains number of
		 * bytes >= number of characters and alloc 1 wchar_t for 0, because
		 * wchar2char wants zero-terminated string
		 */
		wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));

		wlen = char2wchar(wstr, len + 1, str, len, mylocale);
		Assert(wlen <= len);

		while (*wptr)
		{
			*wptr = towlower((wint_t) *wptr);
			wptr++;
		}

		/*
		 * Alloc result string for worst case + '\0'
		 */
		len = pg_database_encoding_max_length() * wlen + 1;
		out = (char *) palloc(len);

		wlen = wchar2char(out, wstr, len, mylocale);

		pfree(wstr);

		if (wlen < 0)
			ereport(ERROR,
					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
			errmsg("conversion from wchar_t to server encoding failed: %m")));
		Assert(wlen < len);
	}
	else
#endif   /* USE_WIDE_UPPER_LOWER */
	{
		const char *ptr = str;
		char	   *outptr;

		outptr = out = (char *) palloc(sizeof(char) * (len + 1));
		while ((ptr - str) < len && *ptr)
		{
			*outptr++ = tolower(TOUCHAR(ptr));
			ptr++;
		}
		*outptr = '\0';
	}

	return out;
}
Ejemplo n.º 9
0
/*
 * char2wchar --- convert multibyte characters to wide characters
 *
 * This has almost the API of mbstowcs(), except that *from need not be
 * null-terminated; instead, the number of input bytes is specified as
 * fromlen.  Also, we ereport() rather than returning -1 for invalid
 * input encoding.	tolen is the maximum number of wchar_t's to store at *to.
 * The output will be zero-terminated iff there is room.
 */
size_t
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
{
	size_t		result;

	if (tolen == 0)
		return 0;

#ifdef WIN32
	/* See WIN32 "Unicode" comment above */
	if (GetDatabaseEncoding() == PG_UTF8)
	{
		/* Win32 API does not work for zero-length input */
		if (fromlen == 0)
			result = 0;
		else
		{
			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
			/* A zero return is failure */
			if (result == 0)
				result = -1;
		}

		if (result != -1)
		{
			Assert(result < tolen);
			/* Append trailing null wchar (MultiByteToWideChar() does not) */
			to[result] = 0;
		}
	}
	else
#endif   /* WIN32 */
	{
		/* mbstowcs requires ending '\0' */
		char	   *str = pnstrdup(from, fromlen);

		Assert(!lc_ctype_is_c());
		result = mbstowcs(to, str, tolen);
		pfree(str);
	}

	if (result == -1)
	{
		/*
		 * Invalid multibyte character encountered.  We try to give a useful
		 * error message by letting pg_verifymbstr check the string.  But it's
		 * possible that the string is OK to us, and not OK to mbstowcs ---
		 * this suggests that the LC_CTYPE locale is different from the
		 * database encoding.  Give a generic error message if verifymbstr
		 * can't find anything wrong.
		 */
		pg_verifymbstr(from, fromlen, false);	/* might not return */
		/* but if it does ... */
		ereport(ERROR,
				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
				 errmsg("invalid multibyte character for locale"),
				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
	}

	return result;
}
Ejemplo n.º 10
0
static inline int
Generic_Text_IC_like(text *str, text *pat, Oid collation)
{
	char	   *s,
			   *p;
	int			slen,
				plen;

	/*
	 * For efficiency reasons, in the single byte case we don't call lower()
	 * on the pattern and text, but instead call SB_lower_char on each
	 * character.  In the multi-byte case we don't have much choice :-(
	 */

	if (pg_database_encoding_max_length() > 1)
	{
		/* lower's result is never packed, so OK to use old macros here */
		pat = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
													PointerGetDatum(pat)));
		p = VARDATA(pat);
		plen = (VARSIZE(pat) - VARHDRSZ);
		str = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
													PointerGetDatum(str)));
		s = VARDATA(str);
		slen = (VARSIZE(str) - VARHDRSZ);
		if (GetDatabaseEncoding() == PG_UTF8)
			return UTF8_MatchText(s, slen, p, plen, 0, true);
		else
			return MB_MatchText(s, slen, p, plen, 0, true);
	}
	else
	{
		/*
		 * Here we need to prepare locale information for SB_lower_char. This
		 * should match the methods used in str_tolower().
		 */
		pg_locale_t locale = 0;
		bool		locale_is_c = false;

		if (lc_ctype_is_c(collation))
			locale_is_c = true;
		else if (collation != DEFAULT_COLLATION_OID)
		{
			if (!OidIsValid(collation))
			{
				/*
				 * This typically means that the parser could not resolve a
				 * conflict of implicit collations, so report it that way.
				 */
				ereport(ERROR,
						(errcode(ERRCODE_INDETERMINATE_COLLATION),
						 errmsg("could not determine which collation to use for ILIKE"),
						 errhint("Use the COLLATE clause to set the collation explicitly.")));
			}
			locale = pg_newlocale_from_collation(collation);
		}

		p = VARDATA_ANY(pat);
		plen = VARSIZE_ANY_EXHDR(pat);
		s = VARDATA_ANY(str);
		slen = VARSIZE_ANY_EXHDR(str);
		return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
	}
}
Ejemplo n.º 11
0
char *
lowerstr(char *str)
{
	char	   *ptr = str;
	char	   *out;
	int			len = strlen(str);

	if ( len == 0 )
		return pstrdup("");

#ifdef TS_USE_WIDE

	/*
	 * Use wide char code only when max encoding length > 1 and ctype != C.
	 * Some operating systems fail with multi-byte encodings and a C locale.
	 * Also, for a C locale there is no need to process as multibyte. From
	 * backend/utils/adt/oracle_compat.c Teodor
	 */
	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
	{
		wchar_t    *wstr,
				   *wptr;
		int		    wlen;

		/* 
		 *alloc number of wchar_t for worst case, len contains
		 * number of bytes <= number of characters and
		 * alloc 1 wchar_t for 0, because wchar2char(wcstombs in really)
		 * wants zero-terminated string
		 */
		wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len+1));

		/*
		 * str SHOULD be cstring, so wlen contains number
		 * of converted character
		 */
		wlen = char2wchar(wstr, str, len);
		if ( wlen < 0 )
			ereport(ERROR,
					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
					 errmsg("translation failed from server encoding to wchar_t")));

		Assert(wlen<=len);
		wstr[wlen] = 0;

		while (*wptr)
		{
			*wptr = towlower((wint_t) *wptr);
			wptr++;
		}

		/*
		 * Alloc result string for worst case + '\0'
		 */
		len = sizeof(char)*pg_database_encoding_max_length()*(wlen+1);
		out = (char*)palloc(len);

		/*
		 * wlen now is number of bytes which is always >= number of characters
		 */
		wlen = wchar2char(out, wstr, len);
		pfree(wstr);

		if ( wlen < 0 )
			ereport(ERROR,
					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
					 errmsg("translation failed from wchar_t to server encoding %d", errno)));
		Assert(wlen<=len);
		out[wlen]='\0';
	}
	else
#endif
	{
		char *outptr;

		outptr = out = (char*)palloc( sizeof(char) * (len+1) );
		while (*ptr)
		{
			*outptr++ = tolower(*(unsigned char *) ptr);
			ptr++;
		}
		*outptr = '\0';
	}

	return out;
}