From 8ee2a422ec86a055ccddd26f397760d372aec4a8 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 30 Dec 2021 10:26:37 +0100 Subject: [PATCH v6 5/7] Reject trailing junk after numeric literals After this, the PostgreSQL lexers no longer accept numeric literals with trailing non-digits, such as 123abc, which would be scanned as two tokens: 123 and abc. This is undocumented and surprising, and it might also interfere with some extended numeric literal syntax being contemplated for the future. Discussion: https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb97d@enterprisedb.com --- src/backend/parser/scan.l | 27 ++++++---- src/fe_utils/psqlscan.l | 21 +++++--- src/interfaces/ecpg/preproc/pgc.l | 4 ++ src/test/regress/expected/numerology.out | 68 +++++++++--------------- 4 files changed, 61 insertions(+), 59 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 76fd6996ed..f889c2faf7 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -399,6 +399,10 @@ real ({integer}|{decimal})[Ee][-+]?{digit}+ realfail1 ({integer}|{decimal})[Ee] realfail2 ({integer}|{decimal})[Ee][-+] +integer_junk {integer}{ident_start} +decimal_junk {decimal}{ident_start} +real_junk {real}{ident_start} + param \${integer} other . @@ -996,19 +1000,24 @@ other . return FCONST; } {realfail1} { - /* - * throw back the [Ee], and figure out whether what - * remains is an {integer} or {decimal}. - */ - yyless(yyleng - 1); SET_YYLLOC(); - return process_integer_literal(yytext, yylval); + yyerror("trailing junk after numeric literal"); } {realfail2} { - /* throw back the [Ee][+-], and proceed as above */ - yyless(yyleng - 2); SET_YYLLOC(); - return process_integer_literal(yytext, yylval); + yyerror("trailing junk after numeric literal"); + } +{integer_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{decimal_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{real_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); } diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index db8a8dfaf2..09709e6151 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -337,6 +337,10 @@ real ({integer}|{decimal})[Ee][-+]?{digit}+ realfail1 ({integer}|{decimal})[Ee] realfail2 ({integer}|{decimal})[Ee][-+] +integer_junk {integer}{ident_start} +decimal_junk {decimal}{ident_start} +real_junk {real}{ident_start} + param \${integer} /* psql-specific: characters allowed in variable names */ @@ -855,17 +859,18 @@ other . ECHO; } {realfail1} { - /* - * throw back the [Ee], and figure out whether what - * remains is an {integer} or {decimal}. - * (in psql, we don't actually care...) - */ - yyless(yyleng - 1); ECHO; } {realfail2} { - /* throw back the [Ee][+-], and proceed as above */ - yyless(yyleng - 2); + ECHO; + } +{integer_junk} { + ECHO; + } +{decimal_junk} { + ECHO; + } +{real_junk} { ECHO; } diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index a2f8c7f3d8..110478059b 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -365,6 +365,10 @@ real ({integer}|{decimal})[Ee][-+]?{digit}+ realfail1 ({integer}|{decimal})[Ee] realfail2 ({integer}|{decimal})[Ee][-+] +integer_junk {integer}{ident_start} +decimal_junk {decimal}{ident_start} +real_junk {real}{ident_start} + param \${integer} /* special characters for other dbms */ diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out index 32c6d80c03..2f176ccb52 100644 --- a/src/test/regress/expected/numerology.out +++ b/src/test/regress/expected/numerology.out @@ -6,57 +6,41 @@ -- Trailing junk in numeric literals -- SELECT 123abc; - abc ------ - 123 -(1 row) - +ERROR: trailing junk after numeric literal at or near "123a" +LINE 1: SELECT 123abc; + ^ SELECT 0x0o; - x0o ------ - 0 -(1 row) - +ERROR: trailing junk after numeric literal at or near "0x" +LINE 1: SELECT 0x0o; + ^ SELECT 1_2_3; - _2_3 ------- - 1 -(1 row) - +ERROR: trailing junk after numeric literal at or near "1_" +LINE 1: SELECT 1_2_3; + ^ SELECT 0.a; - a ---- - 0 -(1 row) - +ERROR: trailing junk after numeric literal at or near "0.a" +LINE 1: SELECT 0.a; + ^ SELECT 0.0a; - a ------ - 0.0 -(1 row) - +ERROR: trailing junk after numeric literal at or near "0.0a" +LINE 1: SELECT 0.0a; + ^ SELECT .0a; - a ------ - 0.0 -(1 row) - +ERROR: trailing junk after numeric literal at or near ".0a" +LINE 1: SELECT .0a; + ^ SELECT 0.0e1a; - a ---- - 0 -(1 row) - +ERROR: trailing junk after numeric literal at or near "0.0e1a" +LINE 1: SELECT 0.0e1a; + ^ SELECT 0.0e; - e ------ - 0.0 -(1 row) - +ERROR: trailing junk after numeric literal at or near "0.0e" +LINE 1: SELECT 0.0e; + ^ SELECT 0.0e+a; -ERROR: syntax error at or near "+" +ERROR: trailing junk after numeric literal at or near "0.0e+" LINE 1: SELECT 0.0e+a; - ^ + ^ -- -- Test implicit type conversions -- This fails for Postgres v6.1 (and earlier?) -- 2.34.1