/*
 * normalize_expr - Normalize statements or expressions.
 *
 * Mask constants, strip unnecessary whitespaces and upcase keywords. expr is
 * modified in-place (destructively). If readability is more important than
 * uniqueness, preserve_space puts one space for one existent whitespace for
 * more readability.
 */
void
normalize_expr(char *expr, bool preserve_space)
{
	core_yyscan_t yyscanner;
	core_yy_extra_type yyextra;
	core_YYSTYPE yylval;
	YYLTYPE		yylloc;
	YYLTYPE		lastloc;
	YYLTYPE start;
	char *wp;
	int			tok, lasttok;

	wp = expr;
	yyscanner = scanner_init(expr,
							 &yyextra,
							 ScanKeywords,
							 NumScanKeywords);

	lasttok = 0;
	lastloc = -1;

	for (;;)
	{
		tok = norm_yylex(expr, &yylval, &yylloc, yyscanner);

		start = yylloc;

		if (lastloc >= 0)
		{
			int i, i2;
			
			/* Skipping preceding whitespaces */
			for(i = lastloc ; i < start && IS_WSCHAR(expr[i]) ; i++);

			/* Searching for trailing whitespace */
			for(i2 = i; i2 < start && !IS_WSCHAR(expr[i2]) ; i2++);

			if (lasttok == IDENT)
			{
				/* Identifiers are copied in case-sensitive manner. */
				memcpy(wp, expr + i, i2 - i);
				wp += i2 - i;
			}
			else
			{
				/* Upcase keywords */
				char *sp;
				for (sp = expr + i ; sp < expr + i2 ; sp++, wp++)
					*wp = (*sp >= 'a' && *sp <= 'z' ?
						   *sp - ('a' - 'A') : *sp);
			}

			/*
			 * Because of destructive writing, wp must not go advance the
			 * reading point.
			 * Although this function's output does not need any validity as a
			 * statement or an expression, spaces are added where it should be
			 * to keep some extent of sanity.  If readability is more important
			 * than uniqueness, preserve_space adds one space for each
			 * existent whitespace.
			 */
			if (tok > 0 &&
				i2 < start &&
				(preserve_space || 
				 (tok >= IDENT && lasttok >= IDENT &&
				  !IS_CONST(tok) && !IS_CONST(lasttok))))
				*wp++ = ' ';

			start = i2;
		}

		/* Exit on parse error. */
		if (tok < 0)
		{
			*wp = 0;
			return;
		}

		/*
		 * Negative signs before numbers are tokenized separately. And
		 * explicit positive signs won't appear in deparsed expressions.
		 */
		if (tok == '-')
			tok = norm_yylex(expr, &yylval, &yylloc, yyscanner);
		
		/* Exit on parse error. */
		if (tok < 0)
		{
			*wp = 0;
			return;
		}

		if (IS_CONST(tok))
		{
			YYLTYPE end;
			
			tok = norm_yylex(expr, &yylval, &end, yyscanner);

			/* Exit on parse error. */
			if (tok < 0)
			{
				*wp = 0;
				return;
			}

			/*
			 * Negative values may be surrounded with parens by the
			 * deparser. Mask involving them.
			 */
			if (lasttok == '(' && tok == ')')
			{
				wp -= (start - lastloc);
				start = lastloc;
				end++;
			}

			while (expr[end - 1] == ' ') end--;			

			*wp++ = '?';
			yylloc = end;
		}

		if (tok == 0)
			break;

		lasttok = tok;
		lastloc = yylloc;
	}
	*wp = 0;
}
Exemplo n.º 2
0
void
normalize_expr(char *expr, bool preserve_space)
{
	core_yyscan_t yyscanner;
	core_yy_extra_type yyextra;
	core_YYSTYPE yylval;
	YYLTYPE		yylloc;
	YYLTYPE		lastloc;
	YYLTYPE start;
	char *wp;
	int			tok, lasttok;

	wp = expr;
	yyscanner = scanner_init(expr,
							 &yyextra,
							 &ScanKeywords,
							 ScanKeywordTokens);

	/*
	 * The warnings about nonstandard escape strings is already emitted in the
	 * core. Just silence them here.
	 */
#if PG_VERSION_NUM >= 90500
	yyextra.escape_string_warning = false;
#endif
	lasttok = 0;
	lastloc = -1;

	for (;;)
	{
		tok = norm_yylex(expr, &yylval, &yylloc, yyscanner);

		start = yylloc;

		if (lastloc >= 0)
		{
			int i, i2;
			
			/* Skipping preceding whitespaces */
			for(i = lastloc ; i < start && IS_WSCHAR(expr[i]) ; i++);

			/* Searching for trailing whitespace */
			for(i2 = i; i2 < start && !IS_WSCHAR(expr[i2]) ; i2++);

			if (lasttok == IDENT)
			{
				/* Identifiers are copied in case-sensitive manner. */
				memcpy(wp, expr + i, i2 - i);
				wp += i2 - i;
			}
#if PG_VERSION_NUM >= 100000
			/*
			 * Since PG10 pg_stat_statements doesn't store trailing semicolon
			 * in the column "query". Normalization is basically useless in the
			 * version but still usefull to match utility commands so follow
			 * the behavior change.
			 */
			else if (lasttok == ';')
			{
				/* Just do nothing */
			}
#endif
			else
			{
				/* Upcase keywords */
				char *sp;
				for (sp = expr + i ; sp < expr + i2 ; sp++, wp++)
					*wp = (*sp >= 'a' && *sp <= 'z' ?
						   *sp - ('a' - 'A') : *sp);
			}

			/*
			 * Because of destructive writing, wp must not go advance the
			 * reading point.
			 * Although this function's output does not need any validity as a
			 * statement or an expression, spaces are added where it should be
			 * to keep some extent of sanity.  If readability is more important
			 * than uniqueness, preserve_space adds one space for each
			 * existent whitespace.
			 */
			if (tok > 0 &&
				i2 < start &&
				(preserve_space || 
				 (tok >= IDENT && lasttok >= IDENT &&
				  !IS_CONST(tok) && !IS_CONST(lasttok))))
				*wp++ = ' ';

			start = i2;
		}

		/* Exit on parse error. */
		if (tok < 0)
		{
			*wp = 0;
			return;
		}

		/*
		 * Negative signs before numbers are tokenized separately. And
		 * explicit positive signs won't appear in deparsed expressions.
		 */
		if (tok == '-')
			tok = norm_yylex(expr, &yylval, &yylloc, yyscanner);
		
		/* Exit on parse error. */
		if (tok < 0)
		{
			*wp = 0;
			return;
		}

		if (IS_CONST(tok))
		{
			YYLTYPE end;
			
			tok = norm_yylex(expr, &yylval, &end, yyscanner);

			/* Exit on parse error. */
			if (tok < 0)
			{
				*wp = 0;
				return;
			}

			/*
			 * Negative values may be surrounded with parens by the
			 * deparser. Mask involving them.
			 */
			if (lasttok == '(' && tok == ')')
			{
				wp -= (start - lastloc);
				start = lastloc;
				end++;
			}

			while (expr[end - 1] == ' ') end--;			

			*wp++ = '?';
			yylloc = end;
		}

		if (tok == 0)
			break;

		lasttok = tok;
		lastloc = yylloc;
	}
	*wp = 0;
}