From 559760e9fa5adf32d6ca6ed2236fa5f4bb0471ea Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouhaud@free.fr>
Date: Wed, 21 Apr 2021 23:54:02 +0800
Subject: [PATCH v1 2/4] Add a sqlol parser.

This is a toy example of alternative grammar that only accept a LOLCODE
compatible version of a

SELECT [column, ] column FROM tablename

and fallback on the core parser for everything else.
---
 contrib/Makefile                |   1 +
 contrib/sqlol/.gitignore        |   7 +
 contrib/sqlol/Makefile          |  33 ++
 contrib/sqlol/sqlol.c           | 107 +++++++
 contrib/sqlol/sqlol_gram.y      | 440 ++++++++++++++++++++++++++
 contrib/sqlol/sqlol_gramparse.h |  61 ++++
 contrib/sqlol/sqlol_keywords.c  |  98 ++++++
 contrib/sqlol/sqlol_keywords.h  |  38 +++
 contrib/sqlol/sqlol_kwlist.h    |  21 ++
 contrib/sqlol/sqlol_scan.l      | 544 ++++++++++++++++++++++++++++++++
 contrib/sqlol/sqlol_scanner.h   | 118 +++++++
 11 files changed, 1468 insertions(+)
 create mode 100644 contrib/sqlol/.gitignore
 create mode 100644 contrib/sqlol/Makefile
 create mode 100644 contrib/sqlol/sqlol.c
 create mode 100644 contrib/sqlol/sqlol_gram.y
 create mode 100644 contrib/sqlol/sqlol_gramparse.h
 create mode 100644 contrib/sqlol/sqlol_keywords.c
 create mode 100644 contrib/sqlol/sqlol_keywords.h
 create mode 100644 contrib/sqlol/sqlol_kwlist.h
 create mode 100644 contrib/sqlol/sqlol_scan.l
 create mode 100644 contrib/sqlol/sqlol_scanner.h

diff --git a/contrib/Makefile b/contrib/Makefile
index f27e458482..2a80cd137b 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -43,6 +43,7 @@ SUBDIRS = \
 		postgres_fdw	\
 		seg		\
 		spi		\
+		sqlol		\
 		tablefunc	\
 		tcn		\
 		test_decoding	\
diff --git a/contrib/sqlol/.gitignore b/contrib/sqlol/.gitignore
new file mode 100644
index 0000000000..3c4b587792
--- /dev/null
+++ b/contrib/sqlol/.gitignore
@@ -0,0 +1,7 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
+sqlol_gram.c
+sqlol_gram.h
+sqlol_scan.c
diff --git a/contrib/sqlol/Makefile b/contrib/sqlol/Makefile
new file mode 100644
index 0000000000..025e77c4ff
--- /dev/null
+++ b/contrib/sqlol/Makefile
@@ -0,0 +1,33 @@
+# contrib/sqlol/Makefile
+
+MODULE_big = sqlol
+OBJS = \
+	$(WIN32RES) \
+	sqlol.o sqlol_gram.o sqlol_scan.o sqlol_keywords.o
+PGFILEDESC = "sqlol - Toy alternative grammar based on LOLCODE"
+
+sqlol_gram.h: sqlol_gram.c
+	touch $@
+
+sqlol_gram.c: BISONFLAGS += -d
+# sqlol_gram.c: BISON_CHECK_CMD = $(PERL) $(srcdir)/check_keywords.pl $< $(top_srcdir)/src/include/parser/kwlist.h
+
+
+sqlol_scan.c: FLEXFLAGS = -CF -p -p
+sqlol_scan.c: FLEX_NO_BACKUP=yes
+sqlol_scan.c: FLEX_FIX_WARNING=yes
+
+
+# Force these dependencies to be known even without dependency info built:
+sqlol_gram.o sqlol_scan.o parser.o: sqlol_gram.h
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/sqlol
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/sqlol/sqlol.c b/contrib/sqlol/sqlol.c
new file mode 100644
index 0000000000..b986966181
--- /dev/null
+++ b/contrib/sqlol/sqlol.c
@@ -0,0 +1,107 @@
+/*-------------------------------------------------------------------------
+ *
+ * sqlol.c
+ *
+ *
+ * Copyright (c) 2008-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  contrib/sqlol/sqlol.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "tcop/tcopprot.h"
+
+#include "sqlol_gramparse.h"
+#include "sqlol_keywords.h"
+
+PG_MODULE_MAGIC;
+
+
+/* Saved hook values in case of unload */
+static parser_hook_type prev_parser_hook = NULL;
+
+void		_PG_init(void);
+void		_PG_fini(void);
+
+static List *sqlol_parser_hook(const char *str, RawParseMode mode);
+
+
+/*
+ * Module load callback
+ */
+void
+_PG_init(void)
+{
+	/* Install hooks. */
+	prev_parser_hook = parser_hook;
+	parser_hook = sqlol_parser_hook;
+}
+
+/*
+ * Module unload callback
+ */
+void
+_PG_fini(void)
+{
+	/* Uninstall hooks. */
+	parser_hook = prev_parser_hook;
+}
+
+/*
+ * sqlol_parser_hook: parse our grammar
+ */
+static List *
+sqlol_parser_hook(const char *str, RawParseMode mode)
+{
+	sqlol_yyscan_t yyscanner;
+	sqlol_base_yy_extra_type yyextra;
+	int			yyresult;
+
+	if (mode != RAW_PARSE_DEFAULT)
+	{
+		if (prev_parser_hook)
+			return (*prev_parser_hook) (str, mode);
+		else
+			return raw_parser(str, mode);
+	}
+
+	/* initialize the flex scanner */
+	yyscanner = sqlol_scanner_init(str, &yyextra.sqlol_yy_extra,
+							 sqlol_ScanKeywords, sqlol_NumScanKeywords);
+
+	/* initialize the bison parser */
+	sqlol_parser_init(&yyextra);
+
+	/* Parse! */
+	yyresult = sqlol_base_yyparse(yyscanner);
+
+	/* Clean up (release memory) */
+	sqlol_scanner_finish(yyscanner);
+
+	/*
+	 * Invalid statement, fallback on previous parser_hook if any or
+	 * raw_parser()
+	 */
+	if (yyresult)
+	{
+		if (prev_parser_hook)
+			return (*prev_parser_hook) (str, mode);
+		else
+			return raw_parser(str, mode);
+	}
+
+	return yyextra.parsetree;
+}
+
+int
+sqlol_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, sqlol_yyscan_t yyscanner)
+{
+	int			cur_token;
+
+	cur_token = sqlol_yylex(&(lvalp->sqlol_yystype), llocp, yyscanner);
+
+	return cur_token;
+}
diff --git a/contrib/sqlol/sqlol_gram.y b/contrib/sqlol/sqlol_gram.y
new file mode 100644
index 0000000000..64d00d14ca
--- /dev/null
+++ b/contrib/sqlol/sqlol_gram.y
@@ -0,0 +1,440 @@
+%{
+
+/*#define YYDEBUG 1*/
+/*-------------------------------------------------------------------------
+ *
+ * sqlol_gram.y
+ *	  sqlol BISON rules/actions
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  contrib/sqlol/sqlol_gram.y
+ *
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "nodes/makefuncs.h"
+
+#include "sqlol_gramparse.h"
+
+/*
+ * Location tracking support --- simpler than bison's default, since we only
+ * want to track the start position not the end position of each nonterminal.
+ */
+#define YYLLOC_DEFAULT(Current, Rhs, N) \
+	do { \
+		if ((N) > 0) \
+			(Current) = (Rhs)[1]; \
+		else \
+			(Current) = (-1); \
+	} while (0)
+
+/*
+ * The above macro assigns -1 (unknown) as the parse location of any
+ * nonterminal that was reduced from an empty rule, or whose leftmost
+ * component was reduced from an empty rule.  This is problematic
+ * for nonterminals defined like
+ *		OptFooList: / * EMPTY * / { ... } | OptFooList Foo { ... } ;
+ * because we'll set -1 as the location during the first reduction and then
+ * copy it during each subsequent reduction, leaving us with -1 for the
+ * location even when the list is not empty.  To fix that, do this in the
+ * action for the nonempty rule(s):
+ *		if (@$ < 0) @$ = @2;
+ * (Although we have many nonterminals that follow this pattern, we only
+ * bother with fixing @$ like this when the nonterminal's parse location
+ * is actually referenced in some rule.)
+ *
+ * A cleaner answer would be to make YYLLOC_DEFAULT scan all the Rhs
+ * locations until it's found one that's not -1.  Then we'd get a correct
+ * location for any nonterminal that isn't entirely empty.  But this way
+ * would add overhead to every rule reduction, and so far there's not been
+ * a compelling reason to pay that overhead.
+ */
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc.  This prevents
+ * memory leaks if we error out during parsing.  Note this only works with
+ * bison >= 2.0.  However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE   pfree
+
+
+#define parser_yyerror(msg)  sqlol_scanner_yyerror(msg, yyscanner)
+#define parser_errposition(pos)  sqlol_scanner_errposition(pos, yyscanner)
+
+static void sqlol_base_yyerror(YYLTYPE *yylloc, sqlol_yyscan_t yyscanner,
+						 const char *msg);
+static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
+static void updateRawStmtEnd(RawStmt *rs, int end_location);
+static Node *makeColumnRef(char *colname, List *indirection,
+						   int location, sqlol_yyscan_t yyscanner);
+static void check_qualified_name(List *names, sqlol_yyscan_t yyscanner);
+static List *check_indirection(List *indirection, sqlol_yyscan_t yyscanner);
+
+%}
+
+%pure-parser
+%expect 0
+%name-prefix="sqlol_base_yy"
+%locations
+
+%parse-param {sqlol_yyscan_t yyscanner}
+%lex-param   {sqlol_yyscan_t yyscanner}
+
+%union
+{
+	sqlol_YYSTYPE		sqlol_yystype;
+	/* these fields must match sqlol_YYSTYPE: */
+	int					ival;
+	char				*str;
+	const char			*keyword;
+
+	List				*list;
+	Node				*node;
+	Value				*value;
+	RangeVar			*range;
+	ResTarget			*target;
+}
+
+%type <node>	stmt toplevel_stmt GimmehStmt simple_gimmeh columnref
+				indirection_el
+
+%type <list>	parse_toplevel stmtmulti gimmeh_list indirection
+
+%type <range>	qualified_name
+
+%type <str>		ColId ColLabel attr_name
+
+%type <target>	gimmeh_el
+
+/*
+ * Non-keyword token types.  These are hard-wired into the "flex" lexer.
+ * They must be listed first so that their numeric codes do not depend on
+ * the set of keywords.  PL/pgSQL depends on this so that it can share the
+ * same lexer.  If you add/change tokens here, fix PL/pgSQL to match!
+ *
+ */
+%token <str>	IDENT FCONST SCONST Op
+
+/*
+ * If you want to make any keyword changes, update the keyword table in
+ * src/include/parser/kwlist.h and add new keywords to the appropriate one
+ * of the reserved-or-not-so-reserved keyword lists, below; search
+ * this file for "Keyword category lists".
+ */
+
+/* ordinary key words in alphabetical order */
+%token <keyword> A GIMMEH HAI HAS I KTHXBYE
+
+
+%%
+
+/*
+ *	The target production for the whole parse.
+ *
+ * Ordinarily we parse a list of statements, but if we see one of the
+ * special MODE_XXX symbols as first token, we parse something else.
+ * The options here correspond to enum RawParseMode, which see for details.
+ */
+parse_toplevel:
+			stmtmulti
+			{
+				pg_yyget_extra(yyscanner)->parsetree = $1;
+			}
+		;
+
+/*
+ * At top level, we wrap each stmt with a RawStmt node carrying start location
+ * and length of the stmt's text.  Notice that the start loc/len are driven
+ * entirely from semicolon locations (@2).  It would seem natural to use
+ * @1 or @3 to get the true start location of a stmt, but that doesn't work
+ * for statements that can start with empty nonterminals (opt_with_clause is
+ * the main offender here); as noted in the comments for YYLLOC_DEFAULT,
+ * we'd get -1 for the location in such cases.
+ * We also take care to discard empty statements entirely.
+ */
+stmtmulti:	stmtmulti KTHXBYE toplevel_stmt
+				{
+					if ($1 != NIL)
+					{
+						/* update length of previous stmt */
+						updateRawStmtEnd(llast_node(RawStmt, $1), @2);
+					}
+					if ($3 != NULL)
+						$$ = lappend($1, makeRawStmt($3, @2 + 1));
+					else
+						$$ = $1;
+				}
+			| toplevel_stmt
+				{
+					if ($1 != NULL)
+						$$ = list_make1(makeRawStmt($1, 0));
+					else
+						$$ = NIL;
+				}
+		;
+
+/*
+ * toplevel_stmt includes BEGIN and END.  stmt does not include them, because
+ * those words have different meanings in function bodys.
+ */
+toplevel_stmt:
+			stmt
+		;
+
+stmt:
+			GimmehStmt
+			| /*EMPTY*/
+				{ $$ = NULL; }
+		;
+
+/*****************************************************************************
+ *
+ * GIMMEH statement
+ *
+ *****************************************************************************/
+
+GimmehStmt:
+			simple_gimmeh						{ $$ = $1; }
+		;
+
+simple_gimmeh:
+			HAI FCONST I HAS A qualified_name
+			GIMMEH gimmeh_list
+				{
+					SelectStmt *n = makeNode(SelectStmt);
+					n->targetList = $8;
+					n->fromClause = list_make1($6);
+					$$ = (Node *)n;
+				}
+		;
+
+gimmeh_list:
+		   gimmeh_el							{ $$ = list_make1($1); }
+		   | gimmeh_list ',' gimmeh_el			{ $$ = lappend($1, $3); }
+
+gimmeh_el:
+		 columnref
+			{
+				$$ = makeNode(ResTarget);
+				$$->name = NULL;
+				$$->indirection = NIL;
+				$$->val = (Node *)$1;
+				$$->location = @1;
+			}
+
+qualified_name:
+			ColId
+				{
+					$$ = makeRangeVar(NULL, $1, @1);
+				}
+			| ColId indirection
+				{
+					check_qualified_name($2, yyscanner);
+					$$ = makeRangeVar(NULL, NULL, @1);
+					switch (list_length($2))
+					{
+						case 1:
+							$$->catalogname = NULL;
+							$$->schemaname = $1;
+							$$->relname = strVal(linitial($2));
+							break;
+						case 2:
+							$$->catalogname = $1;
+							$$->schemaname = strVal(linitial($2));
+							$$->relname = strVal(lsecond($2));
+							break;
+						default:
+							/*
+							 * It's ok to error out here as at this point we
+							 * already parsed a "HAI FCONST" preamble, and no
+							 * other grammar is likely to accept a command
+							 * starting with that, so there's no point trying
+							 * to fall back on the other grammars.
+							 */
+							ereport(ERROR,
+									(errcode(ERRCODE_SYNTAX_ERROR),
+									 errmsg("improper qualified name (too many dotted names): %s",
+											NameListToString(lcons(makeString($1), $2))),
+									 parser_errposition(@1)));
+							break;
+					}
+				}
+		;
+
+columnref:	ColId
+				{
+					$$ = makeColumnRef($1, NIL, @1, yyscanner);
+				}
+			| ColId indirection
+				{
+					$$ = makeColumnRef($1, $2, @1, yyscanner);
+				}
+		;
+
+ColId:		IDENT									{ $$ = $1; }
+
+indirection:
+			indirection_el							{ $$ = list_make1($1); }
+			| indirection indirection_el			{ $$ = lappend($1, $2); }
+		;
+
+indirection_el:
+			'.' attr_name
+				{
+					$$ = (Node *) makeString($2);
+				}
+		;
+
+attr_name:	ColLabel								{ $$ = $1; };
+
+ColLabel:	IDENT									{ $$ = $1; }
+
+%%
+
+/*
+ * The signature of this function is required by bison.  However, we
+ * ignore the passed yylloc and instead use the last token position
+ * available from the scanner.
+ */
+static void
+sqlol_base_yyerror(YYLTYPE *yylloc, sqlol_yyscan_t yyscanner, const char *msg)
+{
+	parser_yyerror(msg);
+}
+
+static RawStmt *
+makeRawStmt(Node *stmt, int stmt_location)
+{
+	RawStmt    *rs = makeNode(RawStmt);
+
+	rs->stmt = stmt;
+	rs->stmt_location = stmt_location;
+	rs->stmt_len = 0;			/* might get changed later */
+	return rs;
+}
+
+/* Adjust a RawStmt to reflect that it doesn't run to the end of the string */
+static void
+updateRawStmtEnd(RawStmt *rs, int end_location)
+{
+	/*
+	 * If we already set the length, don't change it.  This is for situations
+	 * like "select foo ;; select bar" where the same statement will be last
+	 * in the string for more than one semicolon.
+	 */
+	if (rs->stmt_len > 0)
+		return;
+
+	/* OK, update length of RawStmt */
+	rs->stmt_len = end_location - rs->stmt_location;
+}
+
+static Node *
+makeColumnRef(char *colname, List *indirection,
+			  int location, sqlol_yyscan_t yyscanner)
+{
+	/*
+	 * Generate a ColumnRef node, with an A_Indirection node added if there
+	 * is any subscripting in the specified indirection list.  However,
+	 * any field selection at the start of the indirection list must be
+	 * transposed into the "fields" part of the ColumnRef node.
+	 */
+	ColumnRef  *c = makeNode(ColumnRef);
+	int		nfields = 0;
+	ListCell *l;
+
+	c->location = location;
+	foreach(l, indirection)
+	{
+		if (IsA(lfirst(l), A_Indices))
+		{
+			A_Indirection *i = makeNode(A_Indirection);
+
+			if (nfields == 0)
+			{
+				/* easy case - all indirection goes to A_Indirection */
+				c->fields = list_make1(makeString(colname));
+				i->indirection = check_indirection(indirection, yyscanner);
+			}
+			else
+			{
+				/* got to split the list in two */
+				i->indirection = check_indirection(list_copy_tail(indirection,
+																  nfields),
+												   yyscanner);
+				indirection = list_truncate(indirection, nfields);
+				c->fields = lcons(makeString(colname), indirection);
+			}
+			i->arg = (Node *) c;
+			return (Node *) i;
+		}
+		else if (IsA(lfirst(l), A_Star))
+		{
+			/* We only allow '*' at the end of a ColumnRef */
+			if (lnext(indirection, l) != NULL)
+				parser_yyerror("improper use of \"*\"");
+		}
+		nfields++;
+	}
+	/* No subscripting, so all indirection gets added to field list */
+	c->fields = lcons(makeString(colname), indirection);
+	return (Node *) c;
+}
+
+/* check_qualified_name --- check the result of qualified_name production
+ *
+ * It's easiest to let the grammar production for qualified_name allow
+ * subscripts and '*', which we then must reject here.
+ */
+static void
+check_qualified_name(List *names, sqlol_yyscan_t yyscanner)
+{
+	ListCell   *i;
+
+	foreach(i, names)
+	{
+		if (!IsA(lfirst(i), String))
+			parser_yyerror("syntax error");
+	}
+}
+
+/* check_indirection --- check the result of indirection production
+ *
+ * We only allow '*' at the end of the list, but it's hard to enforce that
+ * in the grammar, so do it here.
+ */
+static List *
+check_indirection(List *indirection, sqlol_yyscan_t yyscanner)
+{
+	ListCell *l;
+
+	foreach(l, indirection)
+	{
+		if (IsA(lfirst(l), A_Star))
+		{
+			if (lnext(indirection, l) != NULL)
+				parser_yyerror("improper use of \"*\"");
+		}
+	}
+	return indirection;
+}
+
+/* sqlol_parser_init()
+ * Initialize to parse one query string
+ */
+void
+sqlol_parser_init(sqlol_base_yy_extra_type *yyext)
+{
+	yyext->parsetree = NIL;		/* in case grammar forgets to set it */
+}
diff --git a/contrib/sqlol/sqlol_gramparse.h b/contrib/sqlol/sqlol_gramparse.h
new file mode 100644
index 0000000000..58233a8d87
--- /dev/null
+++ b/contrib/sqlol/sqlol_gramparse.h
@@ -0,0 +1,61 @@
+/*-------------------------------------------------------------------------
+ *
+ * sqlol_gramparse.h
+ *		Shared definitions for the "raw" parser (flex and bison phases only)
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * contrib/sqlol/sqlol_gramparse.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SQLOL_GRAMPARSE_H
+#define SQLOL_GRAMPARSE_H
+
+#include "nodes/parsenodes.h"
+#include "sqlol_scanner.h"
+
+/*
+ * NB: include gram.h only AFTER including scanner.h, because scanner.h
+ * is what #defines YYLTYPE.
+ */
+#include "sqlol_gram.h"
+
+/*
+ * The YY_EXTRA data that a flex scanner allows us to pass around.  Private
+ * state needed for raw parsing/lexing goes here.
+ */
+typedef struct sqlol_base_yy_extra_type
+{
+	/*
+	 * Fields used by the core scanner.
+	 */
+	sqlol_yy_extra_type sqlol_yy_extra;
+
+	/*
+	 * State variables that belong to the grammar.
+	 */
+	List	   *parsetree;		/* final parse result is delivered here */
+} sqlol_base_yy_extra_type;
+
+/*
+ * In principle we should use yyget_extra() to fetch the yyextra field
+ * from a yyscanner struct.  However, flex always puts that field first,
+ * and this is sufficiently performance-critical to make it seem worth
+ * cheating a bit to use an inline macro.
+ */
+#define pg_yyget_extra(yyscanner) (*((sqlol_base_yy_extra_type **) (yyscanner)))
+
+
+/* from parser.c */
+extern int	sqlol_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
+					   sqlol_yyscan_t yyscanner);
+
+/* from gram.y */
+extern void sqlol_parser_init(sqlol_base_yy_extra_type *yyext);
+extern int	sqlol_baseyyparse(sqlol_yyscan_t yyscanner);
+
+#endif							/* SQLOL_GRAMPARSE_H */
diff --git a/contrib/sqlol/sqlol_keywords.c b/contrib/sqlol/sqlol_keywords.c
new file mode 100644
index 0000000000..dbbdf5493c
--- /dev/null
+++ b/contrib/sqlol/sqlol_keywords.c
@@ -0,0 +1,98 @@
+/*-------------------------------------------------------------------------
+ *
+ * sqlol_keywords.c
+ *	  lexical token lookup for key words in PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  sqlol/sqlol_keywords.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "sqlol_gramparse.h"
+
+#define PG_KEYWORD(a,b,c) {a,b,c},
+
+const sqlol_ScanKeyword sqlol_ScanKeywords[] = {
+#include "sqlol_kwlist.h"
+};
+
+const int	sqlol_NumScanKeywords = lengthof(sqlol_ScanKeywords);
+
+#undef PG_KEYWORD
+
+
+/*
+ * ScanKeywordLookup - see if a given word is a keyword
+ *
+ * The table to be searched is passed explicitly, so that this can be used
+ * to search keyword lists other than the standard list appearing above.
+ *
+ * Returns a pointer to the sqlol_ScanKeyword table entry, or NULL if no match.
+ *
+ * The match is done case-insensitively.  Note that we deliberately use a
+ * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
+ * even if we are in a locale where tolower() would produce more or different
+ * translations.  This is to conform to the SQL99 spec, which says that
+ * keywords are to be matched in this way even though non-keyword identifiers
+ * receive a different case-normalization mapping.
+ */
+const sqlol_ScanKeyword *
+sqlol_ScanKeywordLookup(const char *text,
+				  const sqlol_ScanKeyword *keywords,
+				  int num_keywords)
+{
+	int			len,
+				i;
+	char		word[NAMEDATALEN];
+	const sqlol_ScanKeyword *low;
+	const sqlol_ScanKeyword *high;
+
+	len = strlen(text);
+	/* We assume all keywords are shorter than NAMEDATALEN. */
+	if (len >= NAMEDATALEN)
+		return NULL;
+
+	/*
+	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may
+	 * produce the wrong translation in some locales (eg, Turkish).
+	 */
+	for (i = 0; i < len; i++)
+	{
+		char		ch = text[i];
+
+		if (ch >= 'A' && ch <= 'Z')
+			ch += 'a' - 'A';
+		word[i] = ch;
+	}
+	word[len] = '\0';
+
+	/*
+	 * Now do a binary search using plain strcmp() comparison.
+	 */
+	low = keywords;
+	high = keywords + (num_keywords - 1);
+	while (low <= high)
+	{
+		const sqlol_ScanKeyword *middle;
+		int			difference;
+
+		middle = low + (high - low) / 2;
+		difference = strcmp(middle->name, word);
+		if (difference == 0)
+			return middle;
+		else if (difference < 0)
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+
+	return NULL;
+}
+
diff --git a/contrib/sqlol/sqlol_keywords.h b/contrib/sqlol/sqlol_keywords.h
new file mode 100644
index 0000000000..bc4acf4541
--- /dev/null
+++ b/contrib/sqlol/sqlol_keywords.h
@@ -0,0 +1,38 @@
+/*-------------------------------------------------------------------------
+ *
+ * sqlol_keywords.h
+ *	  lexical token lookup for key words in PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * contrib/sqlol/sqlol_keywords.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SQLOL_KEYWORDS_H
+#define SQLOL_KEYWORDS_H
+
+/* Keyword categories --- should match lists in gram.y */
+#define UNRESERVED_KEYWORD		0
+#define COL_NAME_KEYWORD		1
+#define TYPE_FUNC_NAME_KEYWORD	2
+#define RESERVED_KEYWORD		3
+
+
+typedef struct sqlol_ScanKeyword
+{
+	const char *name;			/* in lower case */
+	int16		value;			/* grammar's token code */
+	int16		category;		/* see codes above */
+} sqlol_ScanKeyword;
+
+extern PGDLLIMPORT const sqlol_ScanKeyword sqlol_ScanKeywords[];
+extern PGDLLIMPORT const int sqlol_NumScanKeywords;
+
+extern const sqlol_ScanKeyword *sqlol_ScanKeywordLookup(const char *text,
+				  const sqlol_ScanKeyword *keywords,
+				  int num_keywords);
+
+#endif							/* SQLOL_KEYWORDS_H */
diff --git a/contrib/sqlol/sqlol_kwlist.h b/contrib/sqlol/sqlol_kwlist.h
new file mode 100644
index 0000000000..2de3893ee4
--- /dev/null
+++ b/contrib/sqlol/sqlol_kwlist.h
@@ -0,0 +1,21 @@
+/*-------------------------------------------------------------------------
+ *
+ * sqlol_kwlist.h
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  contrib/sqlol/sqlol_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* name, value, category, is-bare-label */
+PG_KEYWORD("a", A, UNRESERVED_KEYWORD)
+PG_KEYWORD("gimmeh", GIMMEH, UNRESERVED_KEYWORD)
+PG_KEYWORD("hai", HAI, RESERVED_KEYWORD)
+PG_KEYWORD("has", HAS, UNRESERVED_KEYWORD)
+PG_KEYWORD("i", I, UNRESERVED_KEYWORD)
+PG_KEYWORD("kthxbye", KTHXBYE, UNRESERVED_KEYWORD)
diff --git a/contrib/sqlol/sqlol_scan.l b/contrib/sqlol/sqlol_scan.l
new file mode 100644
index 0000000000..a7088b8390
--- /dev/null
+++ b/contrib/sqlol/sqlol_scan.l
@@ -0,0 +1,544 @@
+%top{
+/*-------------------------------------------------------------------------
+ *
+ * sqlol_scan.l
+ *	  lexical scanner for sqlol
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  contrib/sqlol/sqlol_scan.l
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/string.h"
+#include "sqlol_gramparse.h"
+#include "parser/scansup.h"
+#include "mb/pg_wchar.h"
+
+#include "sqlol_keywords.h"
+}
+
+%{
+
+/* LCOV_EXCL_START */
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#undef fprintf
+#define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)
+
+static void
+fprintf_to_ereport(const char *fmt, const char *msg)
+{
+	ereport(ERROR, (errmsg_internal("%s", msg)));
+}
+
+
+/*
+ * Set the type of YYSTYPE.
+ */
+#define YYSTYPE sqlol_YYSTYPE
+
+/*
+ * Set the type of yyextra.  All state variables used by the scanner should
+ * be in yyextra, *not* statically allocated.
+ */
+#define YY_EXTRA_TYPE sqlol_yy_extra_type *
+
+/*
+ * Each call to yylex must set yylloc to the location of the found token
+ * (expressed as a byte offset from the start of the input text).
+ * When we parse a token that requires multiple lexer rules to process,
+ * this should be done in the first such rule, else yylloc will point
+ * into the middle of the token.
+ */
+#define SET_YYLLOC()  (*(yylloc) = yytext - yyextra->scanbuf)
+
+/*
+ * Advance yylloc by the given number of bytes.
+ */
+#define ADVANCE_YYLLOC(delta)  ( *(yylloc) += (delta) )
+
+/*
+ * Sometimes, we do want yylloc to point into the middle of a token; this is
+ * useful for instance to throw an error about an escape sequence within a
+ * string literal.  But if we find no error there, we want to revert yylloc
+ * to the token start, so that that's the location reported to the parser.
+ * Use PUSH_YYLLOC/POP_YYLLOC to save/restore yylloc around such code.
+ * (Currently the implied "stack" is just one location, but someday we might
+ * need to nest these.)
+ */
+#define PUSH_YYLLOC()	(yyextra->save_yylloc = *(yylloc))
+#define POP_YYLLOC()	(*(yylloc) = yyextra->save_yylloc)
+
+#define startlit()	( yyextra->literallen = 0 )
+static void addlit(char *ytext, int yleng, sqlol_yyscan_t yyscanner);
+static void addlitchar(unsigned char ychar, sqlol_yyscan_t yyscanner);
+static char *litbufdup(sqlol_yyscan_t yyscanner);
+
+#define yyerror(msg)  sqlol_scanner_yyerror(msg, yyscanner)
+
+#define lexer_errposition()  sqlol_scanner_errposition(*(yylloc), yyscanner)
+
+/*
+ * Work around a bug in flex 2.5.35: it emits a couple of functions that
+ * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
+ * this would cause warnings.  Providing our own declarations should be
+ * harmless even when the bug gets fixed.
+ */
+extern int	sqlol_yyget_column(yyscan_t yyscanner);
+extern void sqlol_yyset_column(int column_no, yyscan_t yyscanner);
+
+%}
+
+%option reentrant
+%option bison-bridge
+%option bison-locations
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option noyyalloc
+%option noyyrealloc
+%option noyyfree
+%option warn
+%option prefix="sqlol_yy"
+
+/*
+ * OK, here is a short description of lex/flex rules behavior.
+ * The longest pattern which matches an input string is always chosen.
+ * For equal-length patterns, the first occurring in the rules list is chosen.
+ * INITIAL is the starting state, to which all non-conditional rules apply.
+ * Exclusive states change parsing rules while the state is active.  When in
+ * an exclusive state, only those rules defined for that state apply.
+ *
+ * We use exclusive states for quoted strings, extended comments,
+ * and to eliminate parsing troubles for numeric strings.
+ * Exclusive states:
+ *  <xd> delimited identifiers (double-quoted identifiers)
+ *  <xq> standard quoted strings
+ *  <xqs> quote stop (detect continued strings)
+ *
+ * Remember to add an <<EOF>> case whenever you add a new exclusive state!
+ * The default one is probably not the right thing.
+ */
+
+%x xd
+%x xq
+%x xqs
+
+/*
+ * In order to make the world safe for Windows and Mac clients as well as
+ * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
+ * sequence will be seen as two successive newlines, but that doesn't cause
+ * any problems.  Comments that start with -- and extend to the next
+ * newline are treated as equivalent to a single whitespace character.
+ *
+ * NOTE a fine point: if there is no newline following --, we will absorb
+ * everything to the end of the input as a comment.  This is correct.  Older
+ * versions of Postgres failed to recognize -- as a comment if the input
+ * did not end with a newline.
+ *
+ * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix scanner_isspace()
+ * to agree.
+ */
+
+space			[ \t\n\r\f]
+horiz_space		[ \t\f]
+newline			[\n\r]
+non_newline		[^\n\r]
+
+comment			("--"{non_newline}*)
+
+whitespace		({space}+|{comment})
+
+/*
+ * SQL requires at least one newline in the whitespace separating
+ * string literals that are to be concatenated.  Silly, but who are we
+ * to argue?  Note that {whitespace_with_newline} should not have * after
+ * it, whereas {whitespace} should generally have a * after it...
+ */
+
+special_whitespace		({space}+|{comment}{newline})
+horiz_whitespace		({horiz_space}|{comment})
+whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
+
+quote			'
+/* If we see {quote} then {quotecontinue}, the quoted string continues */
+quotecontinue	{whitespace_with_newline}{quote}
+
+/*
+ * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
+ * {quotecontinue}.  It might seem that this could just be {whitespace}*,
+ * but if there's a dash after {whitespace_with_newline}, it must be consumed
+ * to see if there's another dash --- which would start a {comment} and thus
+ * allow continuation of the {quotecontinue} token.
+ */
+quotecontinuefail	{whitespace}*"-"?
+
+/* Extended quote
+ * xqdouble implements embedded quote, ''''
+ */
+xqstart			{quote}
+xqdouble		{quote}{quote}
+xqinside		[^']+
+
+/* Double quote
+ * Allows embedded spaces and other special characters into identifiers.
+ */
+dquote			\"
+xdstart			{dquote}
+xdstop			{dquote}
+xddouble		{dquote}{dquote}
+xdinside		[^"]+
+
+digit			[0-9]
+ident_start		[A-Za-z\200-\377_]
+ident_cont		[A-Za-z\200-\377_0-9\$]
+
+identifier		{ident_start}{ident_cont}*
+
+decimal			(({digit}+)|({digit}*\.{digit}+)|({digit}+\.{digit}*))
+
+other			.
+
+%%
+
+{whitespace}	{
+					/* ignore */
+				}
+
+
+{xqstart}		{
+					yyextra->saw_non_ascii = false;
+					SET_YYLLOC();
+					BEGIN(xq);
+					startlit();
+}
+<xq>{quote} {
+					/*
+					 * When we are scanning a quoted string and see an end
+					 * quote, we must look ahead for a possible continuation.
+					 * If we don't see one, we know the end quote was in fact
+					 * the end of the string.  To reduce the lexer table size,
+					 * we use a single "xqs" state to do the lookahead for all
+					 * types of strings.
+					 */
+					yyextra->state_before_str_stop = YYSTATE;
+					BEGIN(xqs);
+				}
+<xqs>{quotecontinue} {
+					/*
+					 * Found a quote continuation, so return to the in-quote
+					 * state and continue scanning the literal.  Nothing is
+					 * added to the literal's contents.
+					 */
+					BEGIN(yyextra->state_before_str_stop);
+				}
+<xqs>{quotecontinuefail} |
+<xqs>{other} |
+<xqs><<EOF>>	{
+					/*
+					 * Failed to see a quote continuation.  Throw back
+					 * everything after the end quote, and handle the string
+					 * according to the state we were in previously.
+					 */
+					yyless(0);
+					BEGIN(INITIAL);
+
+					switch (yyextra->state_before_str_stop)
+					{
+						case xq:
+							/*
+							 * Check that the data remains valid, if it might
+							 * have been made invalid by unescaping any chars.
+							 */
+							if (yyextra->saw_non_ascii)
+								pg_verifymbstr(yyextra->literalbuf,
+										yyextra->literallen,
+										false);
+							yylval->str = litbufdup(yyscanner);
+							return SCONST;
+						default:
+							yyerror("unhandled previous state in xqs");
+					}
+				}
+
+<xq>{xqdouble} {
+					addlitchar('\'', yyscanner);
+				}
+<xq>{xqinside}  {
+					addlit(yytext, yyleng, yyscanner);
+				}
+<xq><<EOF>>		{ yyerror("unterminated quoted string"); }
+
+
+{xdstart}		{
+					SET_YYLLOC();
+					BEGIN(xd);
+					startlit();
+				}
+<xd>{xdstop}	{
+					char	   *ident;
+
+					BEGIN(INITIAL);
+					if (yyextra->literallen == 0)
+						yyerror("zero-length delimited identifier");
+					ident = litbufdup(yyscanner);
+					if (yyextra->literallen >= NAMEDATALEN)
+						truncate_identifier(ident, yyextra->literallen, true);
+					yylval->str = ident;
+					return IDENT;
+				}
+<xd>{xddouble}	{
+					addlitchar('"', yyscanner);
+				}
+<xd>{xdinside}	{
+					addlit(yytext, yyleng, yyscanner);
+				}
+<xd><<EOF>>		{ yyerror("unterminated quoted identifier"); }
+
+{decimal}		{
+					SET_YYLLOC();
+					yylval->str = pstrdup(yytext);
+					return FCONST;
+				}
+
+{identifier}	{
+					const sqlol_ScanKeyword *keyword;
+					char	   *ident;
+
+					SET_YYLLOC();
+
+					/* Is it a keyword? */
+					keyword = sqlol_ScanKeywordLookup(yytext,
+													yyextra->keywords,
+													yyextra->num_keywords);
+					if (keyword != NULL)
+					{
+						yylval->keyword = keyword->name;
+						return keyword->value;
+					}
+
+					/*
+					 * No.  Convert the identifier to lower case, and truncate
+					 * if necessary.
+					 */
+					ident = downcase_truncate_identifier(yytext, yyleng, true);
+					yylval->str = ident;
+					return IDENT;
+				}
+
+{other}			{
+					SET_YYLLOC();
+					return yytext[0];
+				}
+
+<<EOF>>			{
+					SET_YYLLOC();
+					yyterminate();
+				}
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+/*
+ * Arrange access to yyextra for subroutines of the main yylex() function.
+ * We expect each subroutine to have a yyscanner parameter.  Rather than
+ * use the yyget_xxx functions, which might or might not get inlined by the
+ * compiler, we cheat just a bit and cast yyscanner to the right type.
+ */
+#undef yyextra
+#define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)
+
+/* Likewise for a couple of other things we need. */
+#undef yylloc
+#define yylloc	(((struct yyguts_t *) yyscanner)->yylloc_r)
+#undef yyleng
+#define yyleng	(((struct yyguts_t *) yyscanner)->yyleng_r)
+
+
+/*
+ * scanner_errposition
+ *		Report a lexer or grammar error cursor position, if possible.
+ *
+ * This is expected to be used within an ereport() call.  The return value
+ * is a dummy (always 0, in fact).
+ *
+ * Note that this can only be used for messages emitted during raw parsing
+ * (essentially, sqlol_scan.l, sqlol_parser.c, sqlol_and gram.y), since it
+ * requires the yyscanner struct to still be available.
+ */
+int
+sqlol_scanner_errposition(int location, sqlol_yyscan_t yyscanner)
+{
+	int			pos;
+
+	if (location < 0)
+		return 0;				/* no-op if location is unknown */
+
+	/* Convert byte offset to character number */
+	pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1;
+	/* And pass it to the ereport mechanism */
+	return errposition(pos);
+}
+
+/*
+ * scanner_yyerror
+ *		Report a lexer or grammar error.
+ *
+ * Just ignore as we'll fallback to raw_parser().
+ */
+void
+sqlol_scanner_yyerror(const char *message, sqlol_yyscan_t yyscanner)
+{
+	return;
+}
+
+
+/*
+ * Called before any actual parsing is done
+ */
+sqlol_yyscan_t
+sqlol_scanner_init(const char *str,
+			 sqlol_yy_extra_type *yyext,
+			 const sqlol_ScanKeyword *keywords,
+			 int num_keywords)
+{
+	Size		slen = strlen(str);
+	yyscan_t	scanner;
+
+	if (yylex_init(&scanner) != 0)
+		elog(ERROR, "yylex_init() failed: %m");
+
+	sqlol_yyset_extra(yyext, scanner);
+
+	yyext->keywords = keywords;
+	yyext->num_keywords = num_keywords;
+
+	/*
+	 * Make a scan buffer with special termination needed by flex.
+	 */
+	yyext->scanbuf = (char *) palloc(slen + 2);
+	yyext->scanbuflen = slen;
+	memcpy(yyext->scanbuf, str, slen);
+	yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+	yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
+
+	/* initialize literal buffer to a reasonable but expansible size */
+	yyext->literalalloc = 1024;
+	yyext->literalbuf = (char *) palloc(yyext->literalalloc);
+	yyext->literallen = 0;
+
+	return scanner;
+}
+
+
+/*
+ * Called after parsing is done to clean up after scanner_init()
+ */
+void
+sqlol_scanner_finish(sqlol_yyscan_t yyscanner)
+{
+	/*
+	 * We don't bother to call yylex_destroy(), because all it would do is
+	 * pfree a small amount of control storage.  It's cheaper to leak the
+	 * storage until the parsing context is destroyed.  The amount of space
+	 * involved is usually negligible compared to the output parse tree
+	 * anyway.
+	 *
+	 * We do bother to pfree the scanbuf and literal buffer, but only if they
+	 * represent a nontrivial amount of space.  The 8K cutoff is arbitrary.
+	 */
+	if (yyextra->scanbuflen >= 8192)
+		pfree(yyextra->scanbuf);
+	if (yyextra->literalalloc >= 8192)
+		pfree(yyextra->literalbuf);
+}
+
+
+static void
+addlit(char *ytext, int yleng, sqlol_yyscan_t yyscanner)
+{
+	/* enlarge buffer if needed */
+	if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
+	{
+		do
+		{
+			yyextra->literalalloc *= 2;
+		} while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
+		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
+												yyextra->literalalloc);
+	}
+	/* append new data */
+	memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
+	yyextra->literallen += yleng;
+}
+
+
+static void
+addlitchar(unsigned char ychar, sqlol_yyscan_t yyscanner)
+{
+	/* enlarge buffer if needed */
+	if ((yyextra->literallen + 1) >= yyextra->literalalloc)
+	{
+		yyextra->literalalloc *= 2;
+		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
+												yyextra->literalalloc);
+	}
+	/* append new data */
+	yyextra->literalbuf[yyextra->literallen] = ychar;
+	yyextra->literallen += 1;
+}
+
+
+/*
+ * Create a palloc'd copy of literalbuf, adding a trailing null.
+ */
+static char *
+litbufdup(sqlol_yyscan_t yyscanner)
+{
+	int			llen = yyextra->literallen;
+	char	   *new;
+
+	new = palloc(llen + 1);
+	memcpy(new, yyextra->literalbuf, llen);
+	new[llen] = '\0';
+	return new;
+}
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+sqlol_yyalloc(yy_size_t bytes, sqlol_yyscan_t yyscanner)
+{
+	return palloc(bytes);
+}
+
+void *
+sqlol_yyrealloc(void *ptr, yy_size_t bytes, sqlol_yyscan_t yyscanner)
+{
+	if (ptr)
+		return repalloc(ptr, bytes);
+	else
+		return palloc(bytes);
+}
+
+void
+sqlol_yyfree(void *ptr, sqlol_yyscan_t yyscanner)
+{
+	if (ptr)
+		pfree(ptr);
+}
diff --git a/contrib/sqlol/sqlol_scanner.h b/contrib/sqlol/sqlol_scanner.h
new file mode 100644
index 0000000000..0a497e9d91
--- /dev/null
+++ b/contrib/sqlol/sqlol_scanner.h
@@ -0,0 +1,118 @@
+/*-------------------------------------------------------------------------
+ *
+ * sqlol_scanner.h
+ *		API for the core scanner (flex machine)
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * contrib/sqlol/sqlol_scanner.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SQLOL_SCANNER_H
+#define SQLOL_SCANNER_H
+
+#include "sqlol_keywords.h"
+
+/*
+ * The scanner returns extra data about scanned tokens in this union type.
+ * Note that this is a subset of the fields used in YYSTYPE of the bison
+ * parsers built atop the scanner.
+ */
+typedef union sqlol_YYSTYPE
+{
+	int			ival;			/* for integer literals */
+	char	   *str;			/* for identifiers and non-integer literals */
+	const char *keyword;		/* canonical spelling of keywords */
+} sqlol_YYSTYPE;
+
+/*
+ * We track token locations in terms of byte offsets from the start of the
+ * source string, not the column number/line number representation that
+ * bison uses by default.  Also, to minimize overhead we track only one
+ * location (usually the first token location) for each construct, not
+ * the beginning and ending locations as bison does by default.  It's
+ * therefore sufficient to make YYLTYPE an int.
+ */
+#define YYLTYPE  int
+
+/*
+ * Another important component of the scanner's API is the token code numbers.
+ * However, those are not defined in this file, because bison insists on
+ * defining them for itself.  The token codes used by the core scanner are
+ * the ASCII characters plus these:
+ *	%token <str>	IDENT UIDENT FCONST SCONST USCONST BCONST XCONST Op
+ *	%token <ival>	ICONST PARAM
+ *	%token			TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
+ *	%token			LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+ * The above token definitions *must* be the first ones declared in any
+ * bison parser built atop this scanner, so that they will have consistent
+ * numbers assigned to them (specifically, IDENT = 258 and so on).
+ */
+
+/*
+ * The YY_EXTRA data that a flex scanner allows us to pass around.
+ * Private state needed by the core scanner goes here.  Note that the actual
+ * yy_extra struct may be larger and have this as its first component, thus
+ * allowing the calling parser to keep some fields of its own in YY_EXTRA.
+ */
+typedef struct sqlol_yy_extra_type
+{
+	/*
+	 * The string the scanner is physically scanning.  We keep this mainly so
+	 * that we can cheaply compute the offset of the current token (yytext).
+	 */
+	char	   *scanbuf;
+	Size		scanbuflen;
+
+	/*
+	 * The keyword list to use, and the associated grammar token codes.
+	 */
+	const sqlol_ScanKeyword *keywords;
+	int		num_keywords;
+
+	/*
+	 * literalbuf is used to accumulate literal values when multiple rules are
+	 * needed to parse a single literal.  Call startlit() to reset buffer to
+	 * empty, addlit() to add text.  NOTE: the string in literalbuf is NOT
+	 * necessarily null-terminated, but there always IS room to add a trailing
+	 * null at offset literallen.  We store a null only when we need it.
+	 */
+	char	   *literalbuf;		/* palloc'd expandable buffer */
+	int			literallen;		/* actual current string length */
+	int			literalalloc;	/* current allocated buffer size */
+
+	/*
+	 * Random assorted scanner state.
+	 */
+	int			state_before_str_stop;	/* start cond. before end quote */
+	YYLTYPE		save_yylloc;	/* one-element stack for PUSH_YYLLOC() */
+
+	/* state variables for literal-lexing warnings */
+	bool		saw_non_ascii;
+} sqlol_yy_extra_type;
+
+/*
+ * The type of yyscanner is opaque outside scan.l.
+ */
+typedef void *sqlol_yyscan_t;
+
+
+/* Constant data exported from parser/scan.l */
+extern PGDLLIMPORT const uint16 sqlol_ScanKeywordTokens[];
+
+/* Entry points in parser/scan.l */
+extern sqlol_yyscan_t sqlol_scanner_init(const char *str,
+								  sqlol_yy_extra_type *yyext,
+								  const sqlol_ScanKeyword *keywords,
+								  int num_keywords);
+extern void sqlol_scanner_finish(sqlol_yyscan_t yyscanner);
+extern int	sqlol_yylex(sqlol_YYSTYPE *lvalp, YYLTYPE *llocp,
+					   sqlol_yyscan_t yyscanner);
+extern int	sqlol_scanner_errposition(int location, sqlol_yyscan_t yyscanner);
+extern void sqlol_scanner_yyerror(const char *message, sqlol_yyscan_t yyscanner);
+
+#endif							/* SQLOL_SCANNER_H */
-- 
2.30.1

