From 71f435b2c9173ddb99a7ba89ce583bfff0c8a400 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Date: Fri, 24 Jul 2015 10:58:23 +0900
Subject: [PATCH 2/4] Make use of psqlscan for parsing of custom script.

Make use of psqlscan instead of the home-made parser allowing
backslash continuation for backslash commands, multiline SQL
statements and SQL multi statement in custom scripts.
---
 src/bin/pgbench/Makefile  |  16 +-
 src/bin/pgbench/pgbench.c | 478 +++++++++++++++++++++++++++++++---------------
 2 files changed, 341 insertions(+), 153 deletions(-)

diff --git a/src/bin/pgbench/Makefile b/src/bin/pgbench/Makefile
index 18fdf58..a0a736b 100644
--- a/src/bin/pgbench/Makefile
+++ b/src/bin/pgbench/Makefile
@@ -5,11 +5,13 @@ PGAPPICON = win32
 
 subdir = src/bin/pgbench
 top_builddir = ../../..
+psqlincdir = ../psql
 include $(top_builddir)/src/Makefile.global
 
 OBJS = pgbench.o exprparse.o $(WIN32RES)
 
-override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS)
+
+override CPPFLAGS := -DOUTSIDE_PSQL -I. -I$(srcdir) -I$(libpq_srcdir) -I$(psqlincdir) $(CPPFLAGS)
 
 ifneq ($(PORTNAME), win32)
 override CFLAGS += $(PTHREAD_CFLAGS)
@@ -18,6 +20,16 @@ endif
 
 all: pgbench
 
+psqlscan.c: FLEXFLAGS = -Cfe -p -p
+psqlscan.c: FLEX_NO_BACKUP=yes
+
+psqlscan.l: % : $(top_srcdir)/src/bin/psql/%
+	 rm -f $@ && $(LN_S)  $< .
+
+psqlscan.c:  psqlscan.l
+
+pgbench.o: psqlscan.c
+
 pgbench: $(OBJS) | submake-libpq submake-libpgport
 	$(CC) $(CFLAGS) $^ $(libpq_pgport) $(PTHREAD_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
 
@@ -39,4 +51,4 @@ clean distclean:
 	rm -f pgbench$(X) $(OBJS)
 
 maintainer-clean: distclean
-	rm -f exprparse.c exprscan.c
+	rm -f exprparse.c exprscan.c psqlscan.l psqlscan.c
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 30e8d2a..b6fd399 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -54,6 +54,7 @@
 #endif
 
 #include "pgbench.h"
+#include "psqlscan.h"
 
 #define ERRCODE_UNDEFINED_TABLE  "42P01"
 
@@ -264,7 +265,7 @@ typedef enum QueryMode
 static QueryMode querymode = QUERY_SIMPLE;
 static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
 
-typedef struct
+typedef struct Command_t
 {
 	char	   *line;			/* full text of command line */
 	int			command_num;	/* unique index of this Command struct */
@@ -273,6 +274,7 @@ typedef struct
 	char	   *argv[MAX_ARGS]; /* command word list */
 	int			cols[MAX_ARGS]; /* corresponding column starting from 1 */
 	PgBenchExpr *expr;			/* parsed expression */
+	struct Command_t *next;		/* more command if any, for multistatements */
 } Command;
 
 typedef struct
@@ -295,6 +297,21 @@ typedef struct
 	double		sum2_lag;		/* sum(lag*lag) */
 } AggVals;
 
+typedef enum
+{
+	PS_IDLE,
+	PS_IN_STATEMENT,
+	PS_IN_BACKSLASH_CMD
+} ParseState;
+
+typedef struct ParseInfo
+{
+	PsqlScanState	scan_state;
+	PQExpBuffer		outbuf;
+	ParseState		mode;
+} ParseInfoData;
+typedef ParseInfoData *ParseInfo;
+
 static Command **sql_files[MAX_FILES];	/* SQL script files */
 static int	num_files;			/* number of script files */
 static int	num_commands = 0;	/* total number of Command structs */
@@ -2224,217 +2241,348 @@ syntax_error(const char *source, const int lineno,
 	exit(1);
 }
 
-/* Parse a command; return a Command struct, or NULL if it's a comment */
+static ParseInfo
+createParseInfo(void)
+{
+	ParseInfo ret = (ParseInfo) pg_malloc(sizeof(ParseInfoData));
+
+	ret->scan_state = psql_scan_create();
+	ret->outbuf = createPQExpBuffer();
+	ret->mode = PS_IDLE;
+
+	return ret;
+}
+
+#define parse_reset_outbuf(pcs) resetPQExpBuffer((pcs)->outbuf)
+#define parse_finish_scan(pcs) psql_scan_finish((pcs)->scan_state)
+
+/* copy a string after removing newlines and collapsing whitespaces */
+static char *
+strdup_nonl(const char *in)
+{
+	char *ret, *p, *q;
+
+	ret = pg_strdup(in);
+
+	/* Replace newlines into spaces */
+	for (p = ret ; *p ; p++)
+		if (*p == '\n') *p = ' ';
+
+	/* collapse successive spaces */
+	for (p = q = ret ; *p ; p++, q++)
+	{
+		while (isspace(*p) && isspace(*(p + 1))) p++;
+		if (p > q) *q = *p;
+	}
+	*q = '\0';
+
+	return ret;
+}
+
+/* Parse a backslash command; return a Command struct  */
 static Command *
-process_commands(char *buf, const char *source, const int lineno)
+process_backslash_commands(ParseInfo proc_state, char *buf,
+						   const char *source, const int lineno)
 {
 	const char	delim[] = " \f\n\r\t\v";
 
 	Command    *my_commands;
 	int			j;
 	char	   *p,
+			   *start,
 			   *tok;
-
-	/* Make the string buf end at the next newline */
-	if ((p = strchr(buf, '\n')) != NULL)
-		*p = '\0';
+	int			max_args = -1;
 
 	/* Skip leading whitespace */
 	p = buf;
 	while (isspace((unsigned char) *p))
 		p++;
+	start = p;
+
+	if (proc_state->mode != PS_IN_BACKSLASH_CMD)
+	{
+		if (*p != '\\')
+			return NULL;
+
+		/* This is the first line of a backslash command  */
+		proc_state->mode = PS_IN_BACKSLASH_CMD;
+	}
+
+	/*
+	 * Make the string buf end at the next newline, or move to just after the
+	 * end of line
+	 */
+	if ((p = strchr(start, '\n')) != NULL)
+		*p = '\0';
+	else
+		p = start + strlen(start);
+
+	/* continued line ends with a backslash */
+	if (*(--p) == '\\')
+	{
+		*p-- = '\0';
+		appendPQExpBufferStr(proc_state->outbuf, start);
+
+		/* Add a delimiter at the end of the line if necessary */
+		if (!isspace(*p))
+			appendPQExpBufferChar(proc_state->outbuf, ' ');
 
-	/* If the line is empty or actually a comment, we're done */
-	if (*p == '\0' || strncmp(p, "--", 2) == 0)
 		return NULL;
+	}
+
+	appendPQExpBufferStr(proc_state->outbuf, start);
+	proc_state->mode = PS_IDLE;
+
+	/* Start parsing the backslash command */
+
+	p = proc_state->outbuf->data;
 
 	/* Allocate and initialize Command structure */
 	my_commands = (Command *) pg_malloc(sizeof(Command));
-	my_commands->line = pg_strdup(buf);
+	my_commands->line = pg_strdup(p);
 	my_commands->command_num = num_commands++;
-	my_commands->type = 0;		/* until set */
+	my_commands->type = META_COMMAND;
 	my_commands->argc = 0;
+	my_commands->next = NULL;
 
-	if (*p == '\\')
-	{
-		int			max_args = -1;
+	j = 0;
+	tok = strtok(++p, delim);
 
-		my_commands->type = META_COMMAND;
+	if (tok != NULL && pg_strcasecmp(tok, "set") == 0)
+		max_args = 2;
 
-		j = 0;
-		tok = strtok(++p, delim);
+	while (tok != NULL)
+	{
+		my_commands->cols[j] = tok - buf + 1;
+		my_commands->argv[j++] = pg_strdup(tok);
+		my_commands->argc++;
+		if (max_args >= 0 && my_commands->argc >= max_args)
+			tok = strtok(NULL, "");
+		else
+			tok = strtok(NULL, delim);
+	}
+	parse_reset_outbuf(proc_state);
 
-		if (tok != NULL && pg_strcasecmp(tok, "set") == 0)
-			max_args = 2;
+	if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
+	{
+		/*
+		 * parsing: \setrandom variable min max [uniform] \setrandom
+		 * variable min max (gaussian|exponential) threshold
+		 */
 
-		while (tok != NULL)
+		if (my_commands->argc < 4)
 		{
-			my_commands->cols[j] = tok - buf + 1;
-			my_commands->argv[j++] = pg_strdup(tok);
-			my_commands->argc++;
-			if (max_args >= 0 && my_commands->argc >= max_args)
-				tok = strtok(NULL, "");
-			else
-				tok = strtok(NULL, delim);
+			syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
+						 "missing arguments", NULL, -1);
 		}
 
-		if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
-		{
-			/*
-			 * parsing: \setrandom variable min max [uniform] \setrandom
-			 * variable min max (gaussian|exponential) threshold
-			 */
+		/* argc >= 4 */
 
-			if (my_commands->argc < 4)
+		if (my_commands->argc == 4 ||		/* uniform without/with
+											 * "uniform" keyword */
+			(my_commands->argc == 5 &&
+			 pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
+		{
+			/* nothing to do */
+		}
+		else if (			/* argc >= 5 */
+			(pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
+			(pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
+		{
+			if (my_commands->argc < 6)
 			{
 				syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-							 "missing arguments", NULL, -1);
-			}
-
-			/* argc >= 4 */
-
-			if (my_commands->argc == 4 ||		/* uniform without/with
-												 * "uniform" keyword */
-				(my_commands->argc == 5 &&
-				 pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
-			{
-				/* nothing to do */
+							 "missing threshold argument", my_commands->argv[4], -1);
 			}
-			else if (			/* argc >= 5 */
-					 (pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
-				   (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
-			{
-				if (my_commands->argc < 6)
-				{
-					syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-					 "missing threshold argument", my_commands->argv[4], -1);
-				}
-				else if (my_commands->argc > 6)
-				{
-					syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-								 "too many arguments", my_commands->argv[4],
-								 my_commands->cols[6]);
-				}
-			}
-			else	/* cannot parse, unexpected arguments */
+			else if (my_commands->argc > 6)
 			{
 				syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-							 "unexpected argument", my_commands->argv[4],
-							 my_commands->cols[4]);
+							 "too many arguments", my_commands->argv[4],
+							 my_commands->cols[6]);
 			}
 		}
-		else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
+		else	/* cannot parse, unexpected arguments */
 		{
-			if (my_commands->argc < 3)
-			{
-				syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-							 "missing argument", NULL, -1);
-			}
+			syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
+						 "unexpected argument", my_commands->argv[4],
+						 my_commands->cols[4]);
+		}
+	}
+	else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
+	{
+		if (my_commands->argc < 3)
+		{
+			syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
+						 "missing argument", NULL, -1);
+		}
 
-			expr_scanner_init(my_commands->argv[2], source, lineno,
-							  my_commands->line, my_commands->argv[0],
-							  my_commands->cols[2] - 1);
+		expr_scanner_init(my_commands->argv[2], source, lineno,
+						  my_commands->line, my_commands->argv[0],
+						  my_commands->cols[2] - 1);
 
-			if (expr_yyparse() != 0)
-			{
-				/* dead code: exit done from syntax_error called by yyerror */
-				exit(1);
-			}
+		if (expr_yyparse() != 0)
+		{
+			/* dead code: exit done from syntax_error called by yyerror */
+			exit(1);
+		}
 
-			my_commands->expr = expr_parse_result;
+		my_commands->expr = expr_parse_result;
 
-			expr_scanner_finish();
-		}
-		else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0)
+		expr_scanner_finish();
+	}
+	else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0)
+	{
+		if (my_commands->argc < 2)
 		{
-			if (my_commands->argc < 2)
-			{
-				syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-							 "missing argument", NULL, -1);
-			}
-
-			/*
-			 * Split argument into number and unit to allow "sleep 1ms" etc.
-			 * We don't have to terminate the number argument with null
-			 * because it will be parsed with atoi, which ignores trailing
-			 * non-digit characters.
-			 */
-			if (my_commands->argv[1][0] != ':')
-			{
-				char	   *c = my_commands->argv[1];
+			syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
+						 "missing argument", NULL, -1);
+		}
 
-				while (isdigit((unsigned char) *c))
-					c++;
-				if (*c)
-				{
-					my_commands->argv[2] = c;
-					if (my_commands->argc < 3)
-						my_commands->argc = 3;
-				}
-			}
+		/*
+		 * Split argument into number and unit to allow "sleep 1ms" etc.  We
+		 * don't have to terminate the number argument with null because it
+		 * will be parsed with atoi, which ignores trailing non-digit
+		 * characters.
+		 */
+		if (my_commands->argv[1][0] != ':')
+		{
+			char	   *c = my_commands->argv[1];
 
-			if (my_commands->argc >= 3)
+			while (isdigit((unsigned char) *c))
+				c++;
+			if (*c)
 			{
-				if (pg_strcasecmp(my_commands->argv[2], "us") != 0 &&
-					pg_strcasecmp(my_commands->argv[2], "ms") != 0 &&
-					pg_strcasecmp(my_commands->argv[2], "s") != 0)
-				{
-					syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-								 "unknown time unit, must be us, ms or s",
-								 my_commands->argv[2], my_commands->cols[2]);
-				}
+				my_commands->argv[2] = c;
+				if (my_commands->argc < 3)
+					my_commands->argc = 3;
 			}
-
-			/* this should be an error?! */
-			for (j = 3; j < my_commands->argc; j++)
-				fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
-						my_commands->argv[0], my_commands->argv[j]);
 		}
-		else if (pg_strcasecmp(my_commands->argv[0], "setshell") == 0)
+
+		if (my_commands->argc >= 3)
 		{
-			if (my_commands->argc < 3)
+			if (pg_strcasecmp(my_commands->argv[2], "us") != 0 &&
+				pg_strcasecmp(my_commands->argv[2], "ms") != 0 &&
+				pg_strcasecmp(my_commands->argv[2], "s") != 0)
 			{
 				syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-							 "missing argument", NULL, -1);
+							 "unknown time unit, must be us, ms or s",
+							 my_commands->argv[2], my_commands->cols[2]);
 			}
 		}
-		else if (pg_strcasecmp(my_commands->argv[0], "shell") == 0)
+
+		/* this should be an error?! */
+		for (j = 3; j < my_commands->argc; j++)
+			fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
+					my_commands->argv[0], my_commands->argv[j]);
+	}
+	else if (pg_strcasecmp(my_commands->argv[0], "setshell") == 0)
+	{
+		if (my_commands->argc < 3)
 		{
-			if (my_commands->argc < 1)
-			{
-				syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-							 "missing command", NULL, -1);
-			}
+			syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
+						 "missing argument", NULL, -1);
 		}
-		else
+	}
+	else if (pg_strcasecmp(my_commands->argv[0], "shell") == 0)
+	{
+		if (my_commands->argc < 1)
 		{
 			syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-						 "invalid command", NULL, -1);
+						 "missing command", NULL, -1);
 		}
 	}
 	else
 	{
-		my_commands->type = SQL_COMMAND;
+		syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
+					 "invalid command", NULL, -1);
+	}
+
+	return my_commands;
+}
+
+/* Parse a input line, return non-null if any command terminates. */
+static Command *
+process_commands(ParseInfo proc_state, char *buf,
+				 const char *source, const int lineno)
+{
+	Command *command = NULL;
+	Command *retcomd = NULL;
+	PsqlScanState scan_state = proc_state->scan_state;
+	promptStatus_t prompt_status = PROMPT_READY; /* dummy  */
+	PQExpBuffer qbuf = proc_state->outbuf;
+	PsqlScanResult scan_result;
+
+	if (proc_state->mode != PS_IN_STATEMENT)
+	{
+		command = process_backslash_commands(proc_state, buf, source, lineno);
+
+		/* go to next line for continuation line of backslash command. */
+		if (command != NULL || proc_state->mode == PS_IN_BACKSLASH_CMD)
+			return command;
+	}
+
+	/* Parse statements */
+	psql_scan_setup(scan_state, buf, strlen(buf), NULL, NULL, 0);
+
+next_command:	
+	scan_result = psql_scan(scan_state, qbuf, &prompt_status);
+
+	if (scan_result == PSCAN_SEMICOLON)
+	{
+		proc_state->mode = PS_IDLE;
+		/*
+		 * Command is terminated. Fill the struct.
+		 */
+		command = (Command*) pg_malloc(sizeof(Command));
+		command->line = strdup_nonl(qbuf->data);
+		command->command_num = num_commands++;
+		command->type = SQL_COMMAND;
+		command->argc = 0;
+		command->next = NULL;
+
+		/* Put this command at the end of returning command chain */
+		if (!retcomd)
+			retcomd = command;
+		else
+		{
+			Command *pcomm = retcomd;
+			while (pcomm->next) pcomm = pcomm->next;
+			pcomm->next = command;
+		}
 
 		switch (querymode)
 		{
-			case QUERY_SIMPLE:
-				my_commands->argv[0] = pg_strdup(p);
-				my_commands->argc++;
-				break;
-			case QUERY_EXTENDED:
-			case QUERY_PREPARED:
-				if (!parseQuery(my_commands, p))
-					exit(1);
-				break;
-			default:
+		case QUERY_SIMPLE:
+			command->argv[0] = pg_strdup(qbuf->data);
+			command->argc++;
+			break;
+		case QUERY_EXTENDED:
+		case QUERY_PREPARED:
+			if (!parseQuery(command, qbuf->data))
 				exit(1);
+			break;
+		default:
+			exit(1);
 		}
+
+		parse_reset_outbuf(proc_state);
+
+		/* Ask for the next statement in this line */
+		goto next_command;
+	}
+	else if (scan_result == PSCAN_BACKSLASH)
+	{
+		fprintf(stderr, "Unexpected backslash in SQL statement: %s:%d\n", source, lineno);
+		exit(1);
 	}
 
-	return my_commands;
+	proc_state->mode = PS_IN_STATEMENT;
+	psql_scan_finish(scan_state);
+
+	return retcomd;
 }
 
+
 /*
  * Read a line from fd, and return it in a malloc'd buffer.
  * Return NULL at EOF.
@@ -2489,6 +2637,7 @@ process_file(char *filename)
 				index;
 	char	   *buf;
 	int			alloc_num;
+	ParseInfo proc_state = createParseInfo();
 
 	if (num_files >= MAX_FILES)
 	{
@@ -2509,33 +2658,47 @@ process_file(char *filename)
 		return false;
 	}
 
+	proc_state->mode = PS_IDLE;
+
 	lineno = 0;
 	index = 0;
 
 	while ((buf = read_line_from_file(fd)) != NULL)
 	{
-		Command    *command;
+		Command *command = NULL;
 
 		lineno += 1;
 
-		command = process_commands(buf, filename, lineno);
-
+		command = process_commands(proc_state, buf, filename, lineno);
 		free(buf);
 
 		if (command == NULL)
+		{
+			/*
+			 * command is NULL when psql_scan returns PSCAN_EOL or
+			 * PSCAN_INCOMPLETE. Immediately ask for the next line for the
+			 * cases.
+			 */
 			continue;
+		}
 
-		my_commands[index] = command;
-		index++;
+		while (command)
+		{
+			my_commands[index++] = command;
+			command = command->next;
+		}
 
-		if (index >= alloc_num)
+		if (index > alloc_num)
 		{
 			alloc_num += COMMANDS_ALLOC_NUM;
-			my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
+			my_commands = pg_realloc(my_commands,
+									 sizeof(Command *) * alloc_num);
 		}
 	}
 	fclose(fd);
 
+	parse_finish_scan(proc_state);
+
 	my_commands[index] = NULL;
 
 	sql_files[num_files++] = my_commands;
@@ -2553,6 +2716,7 @@ process_builtin(char *tb, const char *source)
 				index;
 	char		buf[BUFSIZ];
 	int			alloc_num;
+	ParseInfo proc_state = createParseInfo();
 
 	alloc_num = COMMANDS_ALLOC_NUM;
 	my_commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
@@ -2579,10 +2743,12 @@ process_builtin(char *tb, const char *source)
 
 		lineno += 1;
 
-		command = process_commands(buf, source, lineno);
+		command = process_commands(proc_state, buf, source, lineno);
 		if (command == NULL)
 			continue;
 
+		/* builtin doesn't need multistatements */
+		Assert(command->next == NULL);
 		my_commands[index] = command;
 		index++;
 
@@ -2594,6 +2760,7 @@ process_builtin(char *tb, const char *source)
 	}
 
 	my_commands[index] = NULL;
+	parse_finish_scan(proc_state);
 
 	return my_commands;
 }
@@ -3934,3 +4101,12 @@ pthread_join(pthread_t th, void **thread_return)
 }
 
 #endif   /* WIN32 */
+
+/*
+ * psqlscan.c is #include'd here instead of being compiled on its own.
+ * This is because we need postgres_fe.h to be read before any system
+ * include files, else things tend to break on platforms that have
+ * multiple infrastructures for stdio.h and so on.  flex is absolutely
+ * uncooperative about that, so we can't compile psqlscan.c on its own.
+ */
+#include "psqlscan.c"
-- 
1.8.3.1

