From 6e081c44c04201ee9ded9dc6b689824ccabdfc28 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Sun, 31 Oct 2021 15:42:18 +0100
Subject: [PATCH v4] Non-decimal integer literals

Add support for hexadecimal, octal, and binary integer literals:

    0x42F
    0o273
    0b100101

per SQL:202x draft.

This adds support in the lexer as well as in the integer type input
functions.

Discussion: https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb97d@enterprisedb.com
---
 doc/src/sgml/syntax.sgml                   |  26 ++++++
 src/backend/catalog/information_schema.sql |   6 +-
 src/backend/catalog/sql_features.txt       |   1 +
 src/backend/parser/scan.l                  | 103 ++++++++++++++++-----
 src/backend/utils/adt/int8.c               |  54 +++++++++++
 src/backend/utils/adt/numutils.c           |  97 +++++++++++++++++++
 src/fe_utils/psqlscan.l                    |  81 ++++++++++++----
 src/interfaces/ecpg/preproc/pgc.l          |  95 +++++++++++--------
 src/test/regress/expected/int2.out         |  19 ++++
 src/test/regress/expected/int4.out         |  75 +++++++++++++++
 src/test/regress/expected/int8.out         |  19 ++++
 src/test/regress/sql/int2.sql              |   7 ++
 src/test/regress/sql/int4.sql              |  26 ++++++
 src/test/regress/sql/int8.sql              |   7 ++
 14 files changed, 531 insertions(+), 85 deletions(-)
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index d66560b587..a4f04199c6 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -694,6 +694,32 @@ <title>Numeric Constants</title>
 </literallayout>
     </para>
 
+    <para>
+     Additionally, non-decimal integer constants can be used in these forms:
+<synopsis>
+0x<replaceable>hexdigits</replaceable>
+0o<replaceable>octdigits</replaceable>
+0b<replaceable>bindigits</replaceable>
+</synopsis>
+     <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
+     (0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
+     digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
+     digits (0 or 1).  Hexadecimal digits and the radix prefixes can be in
+     upper or lower case.  Note that only integers can have non-decimal forms,
+     not numbers with fractional parts.
+    </para>
+
+    <para>
+     These are some examples of this:
+<literallayout>0b100101
+0B10011001
+0o273
+0O755
+0x42f
+0XFFFF
+</literallayout>
+    </para>
+
     <para>
      <indexterm><primary>integer</primary></indexterm>
      <indexterm><primary>bigint</primary></indexterm>
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index 11d9dd60c2..ce88c483a2 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -119,7 +119,7 @@ CREATE FUNCTION _pg_numeric_precision(typid oid, typmod int4) RETURNS integer
          WHEN 1700 /*numeric*/ THEN
               CASE WHEN $2 = -1
                    THEN null
-                   ELSE (($2 - 4) >> 16) & 65535
+                   ELSE (($2 - 4) >> 16) & 0xFFFF
                    END
          WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/
          WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/
@@ -147,7 +147,7 @@ CREATE FUNCTION _pg_numeric_scale(typid oid, typmod int4) RETURNS integer
        WHEN $1 IN (1700) THEN
             CASE WHEN $2 = -1
                  THEN null
-                 ELSE ($2 - 4) & 65535
+                 ELSE ($2 - 4) & 0xFFFF
                  END
        ELSE null
   END;
@@ -163,7 +163,7 @@ CREATE FUNCTION _pg_datetime_precision(typid oid, typmod int4) RETURNS integer
        WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */
            THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END
        WHEN $1 IN (1186) /* interval */
-           THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535 END
+           THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 & 0xFFFF END
        ELSE null
   END;
 
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
index 9f424216e2..d6359503f3 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -526,6 +526,7 @@ T652	SQL-dynamic statements in SQL routines			NO
 T653	SQL-schema statements in external routines			YES	
 T654	SQL-dynamic statements in external routines			NO	
 T655	Cyclically dependent routines			YES	
+T661	Non-decimal integer literals			YES	SQL:202x draft
 T811	Basic SQL/JSON constructor functions			NO	
 T812	SQL/JSON: JSON_OBJECTAGG			NO	
 T813	SQL/JSON: JSON_ARRAYAGG with ORDER BY			NO	
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 6e6824faeb..fe5ddbe2aa 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
 static char *litbufdup(core_yyscan_t yyscanner);
 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
-static int	process_integer_literal(const char *token, YYSTYPE *lval);
+static int	process_integer_literal(const char *token, YYSTYPE *lval, int base);
 static void addunicode(pg_wchar c, yyscan_t yyscanner);
 
 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
@@ -262,7 +262,7 @@ quotecontinuefail	{whitespace}*"-"?
 xbstart			[bB]{quote}
 xbinside		[^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart			[xX]{quote}
 xhinside		[^']*
 
@@ -341,7 +341,7 @@ xcstart			\/\*{op_chars}*
 xcstop			\*+\/
 xcinside		[^*/]+
 
-digit			[0-9]
+
 ident_start		[A-Za-z\200-\377_]
 ident_cont		[A-Za-z\200-\377_0-9\$]
 
@@ -380,24 +380,44 @@ self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator		{op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit		[0-9]
+hexdigit		[0-9A-Fa-f]
+octdigit		[0-7]
+bindigit		[0-1]
+
+decinteger		{decdigit}+
+hexinteger		0[xX]{hexdigit}+
+octinteger		0[oO]{octdigit}+
+bininteger		0[bB]{bindigit}+
+
+hexfail0		0[xX]
+octfail0		0[oO]
+binfail0		0[bB]
 
-integer			{digit}+
-decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail		{digit}+\.\.
-real			({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1		({integer}|{decimal})[Ee]
-realfail2		({integer}|{decimal})[Ee][-+]
+decfail         {decinteger}{ident_start}
+hexfail			{hexinteger}{ident_start}
+octfail			{octinteger}{ident_start}
+binfail			{bininteger}{ident_start}
 
-param			\${integer}
+numeric			(({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail		{decdigit}+\.\.
+
+real			({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1		({decinteger}|{numeric})[Ee]
+realfail2		({decinteger}|{numeric})[Ee][-+]
+
+param			\${decinteger}
 
 other			.
 
@@ -973,20 +993,53 @@ other			.
 					return PARAM;
 				}
 
-{integer}		{
+{decinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 10);
+				}
+{hexinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext + 2, yylval, 16);
+				}
+{octinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext + 2, yylval, 8);
+				}
+{bininteger}	{
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					return process_integer_literal(yytext + 2, yylval, 2);
+				}
+{hexfail0}		{
+					yyerror("invalid hexadecimal integer");
+				}
+{octfail0}		{
+					yyerror("invalid octal integer");
+				}
+{binfail0}		{
+					yyerror("invalid binary integer");
+				}
+{decfail}		{
+					yyerror("trailing junk after decimal integer");
+				}
+{hexfail}		{
+					yyerror("trailing junk after hexadecimal integer");
+				}
+{octfail}		{
+					yyerror("trailing junk after octal integer");
+				}
+{binfail}		{
+					yyerror("trailing junk after binary integer");
 				}
-{decimal}		{
+{numeric}		{
 					SET_YYLLOC();
 					yylval->str = pstrdup(yytext);
 					return FCONST;
 				}
-{decimalfail}	{
+{numericfail}	{
 					/* throw back the .., and treat as integer */
 					yyless(yyleng - 2);
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					return process_integer_literal(yytext, yylval, 10);
 				}
 {real}			{
 					SET_YYLLOC();
@@ -996,17 +1049,17 @@ other			.
 {realfail1}		{
 					/*
 					 * throw back the [Ee], and figure out whether what
-					 * remains is an {integer} or {decimal}.
+					 * remains is a {decinteger} or {numeric}.
 					 */
 					yyless(yyleng - 1);
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					return process_integer_literal(yytext, yylval, 10);
 				}
 {realfail2}		{
 					/* throw back the [Ee][+-], and proceed as above */
 					yyless(yyleng - 2);
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					return process_integer_literal(yytext, yylval, 10);
 				}
 
 
@@ -1296,17 +1349,17 @@ litbufdup(core_yyscan_t yyscanner)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {*integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
 	int			val;
 	char	   *endptr;
 
 	errno = 0;
-	val = strtoint(token, &endptr, 10);
+	val = strtoint(token, &endptr, base);
 	if (*endptr != '\0' || errno == ERANGE)
 	{
 		/* integer too large (or contains decimal pt), treat it as a float */
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 2168080dcc..c3ed944a6c 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -45,6 +45,17 @@ typedef struct
  * Formatting and conversion routines.
  *---------------------------------------------------------*/
 
+static const int8 hexlookup[128] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * scanint8 --- try to parse a string into an int8.
  *
@@ -84,6 +95,48 @@ scanint8(const char *str, bool errorOK, int64 *result)
 		goto invalid_syntax;
 
 	/* process digits */
+	if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+	{
+		ptr += 2;
+		while (*ptr && isxdigit((unsigned char) *ptr))
+		{
+			int8		digit = hexlookup[(unsigned char) *ptr];
+
+			if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) ||
+				unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+
+			ptr++;
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+	{
+		ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+		{
+			int8		digit = (*ptr++ - '0');
+
+			if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) ||
+				unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+	{
+		ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+		{
+			int8		digit = (*ptr++ - '0');
+
+			if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) ||
+				unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+		}
+	}
+	else
+	{
 	while (*ptr && isdigit((unsigned char) *ptr))
 	{
 		int8		digit = (*ptr++ - '0');
@@ -92,6 +145,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
 			unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
 			goto out_of_range;
 	}
+	}
 
 	/* allow trailing whitespace, but not other trailing chars */
 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b93096f288..7c6520346e 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -173,6 +173,17 @@ pg_atoi(const char *s, int size, int c)
 	return (int32) l;
 }
 
+static const int8 hexlookup[128] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * Convert input string to a signed 16 bit integer.
  *
@@ -208,6 +219,48 @@ pg_strtoint16(const char *s)
 		goto invalid_syntax;
 
 	/* process digits */
+	if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+	{
+		ptr += 2;
+		while (*ptr && isxdigit((unsigned char) *ptr))
+		{
+			int8		digit = hexlookup[(unsigned char) *ptr];
+
+			if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) ||
+				unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+
+			ptr++;
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+	{
+		ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+		{
+			int8		digit = (*ptr++ - '0');
+
+			if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) ||
+				unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+	{
+		ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+		{
+			int8		digit = (*ptr++ - '0');
+
+			if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) ||
+				unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+		}
+	}
+	else
+	{
 	while (*ptr && isdigit((unsigned char) *ptr))
 	{
 		int8		digit = (*ptr++ - '0');
@@ -216,6 +269,7 @@ pg_strtoint16(const char *s)
 			unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
 			goto out_of_range;
 	}
+	}
 
 	/* allow trailing whitespace, but not other trailing chars */
 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -284,6 +338,48 @@ pg_strtoint32(const char *s)
 		goto invalid_syntax;
 
 	/* process digits */
+	if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+	{
+		ptr += 2;
+		while (*ptr && isxdigit((unsigned char) *ptr))
+		{
+			int8		digit = hexlookup[(unsigned char) *ptr];
+
+			if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) ||
+				unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+
+			ptr++;
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+	{
+		ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+		{
+			int8		digit = (*ptr++ - '0');
+
+			if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) ||
+				unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+	{
+		ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+		{
+			int8		digit = (*ptr++ - '0');
+
+			if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) ||
+				unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+				goto out_of_range;
+		}
+	}
+	else
+	{
 	while (*ptr && isdigit((unsigned char) *ptr))
 	{
 		int8		digit = (*ptr++ - '0');
@@ -292,6 +388,7 @@ pg_strtoint32(const char *s)
 			unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
 			goto out_of_range;
 	}
+	}
 
 	/* allow trailing whitespace, but not other trailing chars */
 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 0fab48a382..4436509d88 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -200,7 +200,7 @@ quotecontinuefail	{whitespace}*"-"?
 xbstart			[bB]{quote}
 xbinside		[^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart			[xX]{quote}
 xhinside		[^']*
 
@@ -279,7 +279,6 @@ xcstart			\/\*{op_chars}*
 xcstop			\*+\/
 xcinside		[^*/]+
 
-digit			[0-9]
 ident_start		[A-Za-z\200-\377_]
 ident_cont		[A-Za-z\200-\377_0-9\$]
 
@@ -318,24 +317,44 @@ self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator		{op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit		[0-9]
+hexdigit		[0-9A-Fa-f]
+octdigit		[0-7]
+bindigit		[0-1]
+
+decinteger		{decdigit}+
+hexinteger		0[xX]{hexdigit}+
+octinteger		0[oO]{octdigit}+
+bininteger		0[bB]{bindigit}+
+
+hexfail0		0[xX]
+octfail0		0[oO]
+binfail0		0[bB]
 
-integer			{digit}+
-decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail		{digit}+\.\.
-real			({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1		({integer}|{decimal})[Ee]
-realfail2		({integer}|{decimal})[Ee][-+]
+decfail			{decinteger}{ident_start}
+hexfail			{hexinteger}{ident_start}
+octfail			{octinteger}{ident_start}
+binfail			{bininteger}{ident_start}
 
-param			\${integer}
+numeric			(({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail		{decdigit}+\.\.
+
+real			({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1		({decinteger}|{numeric})[Ee]
+realfail2		({decinteger}|{numeric})[Ee][-+]
+
+param			\${decinteger}
 
 /* psql-specific: characters allowed in variable names */
 variable_char	[A-Za-z\200-\377_0-9]
@@ -839,13 +858,43 @@ other			.
 					ECHO;
 				}
 
-{integer}		{
+{decinteger}	{
+					ECHO;
+				}
+{hexinteger}	{
+					ECHO;
+				}
+{octinteger}	{
+					ECHO;
+				}
+{bininteger}	{
+					ECHO;
+				}
+{hexfail0}		{
+					ECHO;
+				}
+{octfail0}		{
+					ECHO;
+				}
+{binfail0}		{
+					ECHO;
+				}
+{decfail}		{
+					ECHO;
+				}
+{hexfail}		{
+					ECHO;
+				}
+{octfail}		{
+					ECHO;
+				}
+{binfail}		{
 					ECHO;
 				}
-{decimal}		{
+{numeric}		{
 					ECHO;
 				}
-{decimalfail}	{
+{numericfail}	{
 					/* throw back the .., and treat as integer */
 					yyless(yyleng - 2);
 					ECHO;
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 7a0356638d..8d6e1cd76a 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -57,7 +57,7 @@ static bool		include_next;
 #define startlit()	(literalbuf[0] = '\0', literallen = 0)
 static void addlit(char *ytext, int yleng);
 static void addlitchar(unsigned char);
-static int	process_integer_literal(const char *token, YYSTYPE *lval);
+static int	process_integer_literal(const char *token, YYSTYPE *lval, int base);
 static void parse_include(void);
 static bool ecpg_isspace(char ch);
 static bool isdefine(void);
@@ -305,7 +305,6 @@ xcstart			\/\*{op_chars}*
 xcstop			\*+\/
 xcinside		[^*/]+
 
-digit			[0-9]
 ident_start		[A-Za-z\200-\377_]
 ident_cont		[A-Za-z\200-\377_0-9\$]
 
@@ -346,24 +345,44 @@ self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator		{op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit		[0-9]
+hexdigit		[0-9A-Fa-f]
+octdigit		[0-7]
+bindigit		[0-1]
+
+decinteger		{decdigit}+
+hexinteger		0[xX]{hexdigit}+
+octinteger		0[oO]{octdigit}+
+bininteger		0[bB]{bindigit}+
+
+hexfail0		0[xX]
+octfail0		0[oO]
+binfail0		0[bB]
+
+decfail			{decinteger}{ident_start}
+hexfail			{hexinteger}{ident_start}
+octfail			{octinteger}{ident_start}
+binfail			{bininteger}{ident_start}
 
-integer			{digit}+
-decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail		{digit}+\.\.
-real			({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1		({integer}|{decimal})[Ee]
-realfail2		({integer}|{decimal})[Ee][-+]
+numeric			(({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail		{decdigit}+\.\.
 
-param			\${integer}
+real			({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1		({decinteger}|{numeric})[Ee]
+realfail2		({decinteger}|{numeric})[Ee][-+]
+
+param			\${decinteger}
 
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
@@ -393,9 +412,6 @@ include_next	[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import			[iI][mM][pP][oO][rR][tT]
 undef			[uU][nN][dD][eE][fF]
 
-/* C version of hex number */
-xch				0[xX][0-9A-Fa-f]*
-
 ccomment		"//".*\n
 
 if				[iI][fF]
@@ -408,7 +424,7 @@ endif			[eE][nN][dD][iI][fF]
 struct			[sS][tT][rR][uU][cC][tT]
 
 exec_sql		{exec}{space}*{sql}{space}*
-ipdigit			({digit}|{digit}{digit}|{digit}{digit}{digit})
+ipdigit			({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
 ip				{ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
 
 /* we might want to parse all cpp include files */
@@ -923,17 +939,20 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 }  /* <SQL> */
 
 <C,SQL>{
-{integer}		{
-					return process_integer_literal(yytext, &base_yylval);
+{decinteger}	{
+					return process_integer_literal(yytext, &base_yylval, 10);
 				}
-{decimal}		{
+{hexinteger}	{
+					return process_integer_literal(yytext + 2, &base_yylval, 16);
+				}
+{numeric}		{
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-{decimalfail}	{
+{numericfail}	{
 					/* throw back the .., and treat as integer */
 					yyless(yyleng - 2);
-					return process_integer_literal(yytext, &base_yylval);
+					return process_integer_literal(yytext, &base_yylval, 10);
 				}
 {real}			{
 					base_yylval.str = mm_strdup(yytext);
@@ -942,18 +961,25 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 {realfail1}		{
 					/*
 					 * throw back the [Ee], and figure out whether what
-					 * remains is an {integer} or {decimal}.
+					 * remains is an {decinteger} or {numeric}.
 					 */
 					yyless(yyleng - 1);
-					return process_integer_literal(yytext, &base_yylval);
+					return process_integer_literal(yytext, &base_yylval, 10);
 				}
 {realfail2}		{
 					/* throw back the [Ee][+-], and proceed as above */
 					yyless(yyleng - 2);
-					return process_integer_literal(yytext, &base_yylval);
+					return process_integer_literal(yytext, &base_yylval, 10);
 				}
 } /* <C,SQL> */
 
+<SQL>{octinteger}	{
+					return process_integer_literal(yytext + 2, &base_yylval, 8);
+				}
+<SQL>{bininteger}	{
+					return process_integer_literal(yytext + 2, &base_yylval, 2);
+				}
+
 <SQL>{
 :{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{
 					base_yylval.str = mm_strdup(yytext+1);
@@ -1009,19 +1035,6 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 							return S_ANYTHING;
 					 }
 <C>{ccomment}		{ ECHO; }
-<C>{xch}			{
-						char* endptr;
-
-						errno = 0;
-						base_yylval.ival = strtoul((char *)yytext,&endptr,16);
-						if (*endptr != '\0' || errno == ERANGE)
-						{
-							errno = 0;
-							base_yylval.str = mm_strdup(yytext);
-							return SCONST;
-						}
-						return ICONST;
-					}
 <C>{cppinclude}		{
 						if (system_includes)
 						{
@@ -1546,17 +1559,17 @@ addlitchar(unsigned char ychar)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {*integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
 	int			val;
 	char	   *endptr;
 
 	errno = 0;
-	val = strtoint(token, &endptr, 10);
+	val = strtoint(token, &endptr, base);
 	if (*endptr != '\0' || errno == ERANGE)
 	{
 		/* integer too large (or contains decimal pt), treat it as a float */
diff --git a/src/test/regress/expected/int2.out b/src/test/regress/expected/int2.out
index 55ea7202cd..220e1493e8 100644
--- a/src/test/regress/expected/int2.out
+++ b/src/test/regress/expected/int2.out
@@ -306,3 +306,22 @@ FROM (VALUES (-2.5::numeric),
   2.5 |          3
 (7 rows)
 
+-- non-decimal literals
+SELECT int2 '0b100101';
+ int2 
+------
+   37
+(1 row)
+
+SELECT int2 '0o273';
+ int2 
+------
+  187
+(1 row)
+
+SELECT int2 '0x42F';
+ int2 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int4.out b/src/test/regress/expected/int4.out
index 9d20b3380f..060b599705 100644
--- a/src/test/regress/expected/int4.out
+++ b/src/test/regress/expected/int4.out
@@ -437,3 +437,78 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 ERROR:  integer out of range
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
 ERROR:  integer out of range
+-- non-decimal literals
+SELECT int4 '0b100101';
+ int4 
+------
+   37
+(1 row)
+
+SELECT int4 '0o273';
+ int4 
+------
+  187
+(1 row)
+
+SELECT int4 '0x42F';
+ int4 
+------
+ 1071
+(1 row)
+
+-- lexer literals
+SELECT 0b100101;
+ ?column? 
+----------
+       37
+(1 row)
+
+SELECT 0o273;
+ ?column? 
+----------
+      187
+(1 row)
+
+SELECT 0x42F;
+ ?column? 
+----------
+     1071
+(1 row)
+
+-- error cases
+SELECT 0b;
+ERROR:  invalid binary integer at or near "SELECT 0b"
+LINE 1: SELECT 0b;
+        ^
+SELECT 1b;
+ERROR:  trailing junk after decimal integer at or near "SELECT 1b"
+LINE 1: SELECT 1b;
+        ^
+SELECT 0b0x;
+ERROR:  trailing junk after binary integer at or near "SELECT 0b0x"
+LINE 1: SELECT 0b0x;
+        ^
+SELECT 0o;
+ERROR:  invalid octal integer at or near "SELECT 0o"
+LINE 1: SELECT 0o;
+        ^
+SELECT 1o;
+ERROR:  trailing junk after decimal integer at or near "SELECT 1o"
+LINE 1: SELECT 1o;
+        ^
+SELECT 0o0x;
+ERROR:  trailing junk after octal integer at or near "SELECT 0o0x"
+LINE 1: SELECT 0o0x;
+        ^
+SELECT 0x;
+ERROR:  invalid hexadecimal integer at or near "SELECT 0x"
+LINE 1: SELECT 0x;
+        ^
+SELECT 1x;
+ERROR:  trailing junk after decimal integer at or near "SELECT 1x"
+LINE 1: SELECT 1x;
+        ^
+SELECT 0x0y;
+ERROR:  trailing junk after hexadecimal integer at or near "SELECT 0x0y"
+LINE 1: SELECT 0x0y;
+        ^
diff --git a/src/test/regress/expected/int8.out b/src/test/regress/expected/int8.out
index 36540ec456..edd15a4353 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -932,3 +932,22 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
 ERROR:  bigint out of range
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
 ERROR:  bigint out of range
+-- non-decimal literals
+SELECT int8 '0b100101';
+ int8 
+------
+   37
+(1 row)
+
+SELECT int8 '0o273';
+ int8 
+------
+  187
+(1 row)
+
+SELECT int8 '0x42F';
+ int8 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql
index 613b344704..0dee22fe6d 100644
--- a/src/test/regress/sql/int2.sql
+++ b/src/test/regress/sql/int2.sql
@@ -112,3 +112,10 @@ CREATE TABLE INT2_TBL(f1 int2);
              (0.5::numeric),
              (1.5::numeric),
              (2.5::numeric)) t(x);
+
+
+-- non-decimal literals
+
+SELECT int2 '0b100101';
+SELECT int2 '0o273';
+SELECT int2 '0x42F';
diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql
index 55ec07a147..d97d017fca 100644
--- a/src/test/regress/sql/int4.sql
+++ b/src/test/regress/sql/int4.sql
@@ -176,3 +176,29 @@ CREATE TABLE INT4_TBL(f1 int4);
 
 SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int4 '0b100101';
+SELECT int4 '0o273';
+SELECT int4 '0x42F';
+
+-- lexer literals
+
+SELECT 0b100101;
+SELECT 0o273;
+SELECT 0x42F;
+
+-- error cases
+SELECT 0b;
+SELECT 1b;
+SELECT 0b0x;
+
+SELECT 0o;
+SELECT 1o;
+SELECT 0o0x;
+
+SELECT 0x;
+SELECT 1x;
+SELECT 0x0y;
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index 32940b4daa..b7ad696dd8 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -250,3 +250,10 @@ CREATE TABLE INT8_TBL(q1 int8, q2 int8);
 
 SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int8 '0b100101';
+SELECT int8 '0o273';
+SELECT int8 '0x42F';
-- 
2.33.1