/* * normalize_expr - Normalize statements or expressions. * * Mask constants, strip unnecessary whitespaces and upcase keywords. expr is * modified in-place (destructively). If readability is more important than * uniqueness, preserve_space puts one space for one existent whitespace for * more readability. */ void normalize_expr(char *expr, bool preserve_space) { core_yyscan_t yyscanner; core_yy_extra_type yyextra; core_YYSTYPE yylval; YYLTYPE yylloc; YYLTYPE lastloc; YYLTYPE start; char *wp; int tok, lasttok; wp = expr; yyscanner = scanner_init(expr, &yyextra, ScanKeywords, NumScanKeywords); lasttok = 0; lastloc = -1; for (;;) { tok = norm_yylex(expr, &yylval, &yylloc, yyscanner); start = yylloc; if (lastloc >= 0) { int i, i2; /* Skipping preceding whitespaces */ for(i = lastloc ; i < start && IS_WSCHAR(expr[i]) ; i++); /* Searching for trailing whitespace */ for(i2 = i; i2 < start && !IS_WSCHAR(expr[i2]) ; i2++); if (lasttok == IDENT) { /* Identifiers are copied in case-sensitive manner. */ memcpy(wp, expr + i, i2 - i); wp += i2 - i; } else { /* Upcase keywords */ char *sp; for (sp = expr + i ; sp < expr + i2 ; sp++, wp++) *wp = (*sp >= 'a' && *sp <= 'z' ? *sp - ('a' - 'A') : *sp); } /* * Because of destructive writing, wp must not go advance the * reading point. * Although this function's output does not need any validity as a * statement or an expression, spaces are added where it should be * to keep some extent of sanity. If readability is more important * than uniqueness, preserve_space adds one space for each * existent whitespace. */ if (tok > 0 && i2 < start && (preserve_space || (tok >= IDENT && lasttok >= IDENT && !IS_CONST(tok) && !IS_CONST(lasttok)))) *wp++ = ' '; start = i2; } /* Exit on parse error. */ if (tok < 0) { *wp = 0; return; } /* * Negative signs before numbers are tokenized separately. And * explicit positive signs won't appear in deparsed expressions. */ if (tok == '-') tok = norm_yylex(expr, &yylval, &yylloc, yyscanner); /* Exit on parse error. */ if (tok < 0) { *wp = 0; return; } if (IS_CONST(tok)) { YYLTYPE end; tok = norm_yylex(expr, &yylval, &end, yyscanner); /* Exit on parse error. */ if (tok < 0) { *wp = 0; return; } /* * Negative values may be surrounded with parens by the * deparser. Mask involving them. */ if (lasttok == '(' && tok == ')') { wp -= (start - lastloc); start = lastloc; end++; } while (expr[end - 1] == ' ') end--; *wp++ = '?'; yylloc = end; } if (tok == 0) break; lasttok = tok; lastloc = yylloc; } *wp = 0; }
void normalize_expr(char *expr, bool preserve_space) { core_yyscan_t yyscanner; core_yy_extra_type yyextra; core_YYSTYPE yylval; YYLTYPE yylloc; YYLTYPE lastloc; YYLTYPE start; char *wp; int tok, lasttok; wp = expr; yyscanner = scanner_init(expr, &yyextra, &ScanKeywords, ScanKeywordTokens); /* * The warnings about nonstandard escape strings is already emitted in the * core. Just silence them here. */ #if PG_VERSION_NUM >= 90500 yyextra.escape_string_warning = false; #endif lasttok = 0; lastloc = -1; for (;;) { tok = norm_yylex(expr, &yylval, &yylloc, yyscanner); start = yylloc; if (lastloc >= 0) { int i, i2; /* Skipping preceding whitespaces */ for(i = lastloc ; i < start && IS_WSCHAR(expr[i]) ; i++); /* Searching for trailing whitespace */ for(i2 = i; i2 < start && !IS_WSCHAR(expr[i2]) ; i2++); if (lasttok == IDENT) { /* Identifiers are copied in case-sensitive manner. */ memcpy(wp, expr + i, i2 - i); wp += i2 - i; } #if PG_VERSION_NUM >= 100000 /* * Since PG10 pg_stat_statements doesn't store trailing semicolon * in the column "query". Normalization is basically useless in the * version but still usefull to match utility commands so follow * the behavior change. */ else if (lasttok == ';') { /* Just do nothing */ } #endif else { /* Upcase keywords */ char *sp; for (sp = expr + i ; sp < expr + i2 ; sp++, wp++) *wp = (*sp >= 'a' && *sp <= 'z' ? *sp - ('a' - 'A') : *sp); } /* * Because of destructive writing, wp must not go advance the * reading point. * Although this function's output does not need any validity as a * statement or an expression, spaces are added where it should be * to keep some extent of sanity. If readability is more important * than uniqueness, preserve_space adds one space for each * existent whitespace. */ if (tok > 0 && i2 < start && (preserve_space || (tok >= IDENT && lasttok >= IDENT && !IS_CONST(tok) && !IS_CONST(lasttok)))) *wp++ = ' '; start = i2; } /* Exit on parse error. */ if (tok < 0) { *wp = 0; return; } /* * Negative signs before numbers are tokenized separately. And * explicit positive signs won't appear in deparsed expressions. */ if (tok == '-') tok = norm_yylex(expr, &yylval, &yylloc, yyscanner); /* Exit on parse error. */ if (tok < 0) { *wp = 0; return; } if (IS_CONST(tok)) { YYLTYPE end; tok = norm_yylex(expr, &yylval, &end, yyscanner); /* Exit on parse error. */ if (tok < 0) { *wp = 0; return; } /* * Negative values may be surrounded with parens by the * deparser. Mask involving them. */ if (lasttok == '(' && tok == ')') { wp -= (start - lastloc); start = lastloc; end++; } while (expr[end - 1] == ' ') end--; *wp++ = '?'; yylloc = end; } if (tok == 0) break; lasttok = tok; lastloc = yylloc; } *wp = 0; }