From e9d7b787cd3ce0579114558e27fcf5b6d4f3a22a Mon Sep 17 00:00:00 2001 From: "Chao Li (Evan)" Date: Mon, 22 Jun 2026 15:33:04 +0800 Subject: [PATCH v1] psql: Fix CREATE SCHEMA scanning with object names like "begin" Since d51697484, psql's scanner treats CREATE SCHEMA as a command that may contain SQL-standard routine bodies, so that semicolons inside BEGIN ATOMIC ... END blocks do not terminate the command too early. However, the heuristic enabled BEGIN/END tracking for every CREATE SCHEMA statement. As a result, an object name such as "begin" could make interactive psql think the statement was still incomplete, for example: CREATE SCHEMA s CREATE VIEW begin AS SELECT 1; Fix this by enabling BEGIN/END tracking in CREATE SCHEMA only after recognizing a nested CREATE [OR REPLACE] FUNCTION/PROCEDURE command at top-level parenthesis depth. Add interactive psql tests for the false-positive cases and for nested SQL routine bodies inside CREATE SCHEMA. Author: Chao Li --- src/bin/psql/t/001_basic.pl | 52 ++++++++++++++++++ src/fe_utils/psqlscan.l | 83 +++++++++++++++++++++++++---- src/include/fe_utils/psqlscan_int.h | 7 +++ 3 files changed, 132 insertions(+), 10 deletions(-) diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl index bbd330216ae..d300ef10921 100644 --- a/src/bin/psql/t/001_basic.pl +++ b/src/bin/psql/t/001_basic.pl @@ -72,6 +72,58 @@ max_wal_senders = 4 }); $node->start; +{ + my $h = $node->interactive_psql('postgres'); + $h->set_query_timer_restart(); + + my $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA IF EXISTS psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema CREATE VIEW begin AS SELECT 1;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with object named begin ends at semicolon'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE VIEW v AS SELECT 1 AS function, 2 AS begin;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with column alias function and begin ends at semicolon'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE PROCEDURE p()\n" + . " BEGIN ATOMIC\n" + . " SELECT 1;\n" + . " END;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with SQL procedure body waits for END'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE OR REPLACE FUNCTION f() RETURNS int\n" + . " BEGIN ATOMIC\n" + . " SELECT 1;\n" + . " END;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with OR REPLACE SQL function body waits for END'); + + $h->quit or die "psql returned $?"; +} + psql_like($node, '\copyright', qr/Copyright/, '\copyright'); psql_like($node, '\help', qr/ALTER/, '\help without arguments'); psql_like($node, '\help SELECT', qr/SELECT/, '\help with argument'); diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index d29dda4d8e1..410eb657703 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -61,6 +61,22 @@ typedef int YYSTYPE; #define ECHO psqlscan_emit(cur_state, yytext, yyleng) +/* + * States for detecting nested CREATE [OR REPLACE] FUNCTION/PROCEDURE + * in CREATE SCHEMA. + */ +#define CS_BODY_NONE 0 +#define CS_BODY_CREATE 1 +#define CS_BODY_CREATE_OR 2 +#define CS_BODY_CREATE_OR_REPLACE 3 + +static void +psqlscan_reset_create_schema_body(PsqlScanState state) +{ + state->create_schema_state = CS_BODY_NONE; + state->create_schema_routine_body = false; +} + %} %option reentrant @@ -682,6 +698,7 @@ other . /* Terminate lexing temporarily */ cur_state->start_state = YY_START; cur_state->identifier_count = 0; + psqlscan_reset_create_schema_body(cur_state); return LEXRES_SEMI; } } @@ -695,7 +712,10 @@ other . /* Force a semi-colon or colon into the query buffer */ psqlscan_emit(cur_state, yytext + 1, 1); if (yytext[1] == ';') + { cur_state->identifier_count = 0; + psqlscan_reset_create_schema_body(cur_state); + } } "\\" { @@ -921,6 +941,9 @@ other . {identifier} { + bool is_create_schema; + bool track_begin_depth; + /* * We need to track if we are inside a BEGIN .. END block * in a function definition, so that semicolons contained @@ -928,11 +951,9 @@ other . * writing a full parser here, the following heuristic * should work. First, we track whether the beginning of * the statement matches CREATE [OR REPLACE] - * {FUNCTION|PROCEDURE|SCHEMA}. (Allowing this in - * CREATE SCHEMA, without tracking whether we're within a - * CREATE FUNCTION/PROCEDURE subcommand, is a bit shaky - * but should be okay with the present set of valid - * subcommands.) + * {FUNCTION|PROCEDURE}. For CREATE SCHEMA, track the + * BEGIN .. END block only after recognizing an embedded + * CREATE [OR REPLACE] FUNCTION/PROCEDURE subcommand. */ if (cur_state->identifier_count == 0) @@ -951,11 +972,49 @@ other . cur_state->identifier_count++; - if (cur_state->identifiers[0] == 'c' && - (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' || - (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' && - (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p')) || - cur_state->identifiers[1] == 's') && + is_create_schema = + cur_state->identifiers[0] == 'c' && + cur_state->identifiers[1] == 's'; + + /* + * In CREATE SCHEMA, recognize nested SQL routines only at + * top-level paren depth. This avoids treating + * identifiers appearing in expressions as schema elements. + */ + if (is_create_schema && + cur_state->paren_depth == 0 && + cur_state->begin_depth == 0 && + !cur_state->create_schema_routine_body) + { + if (pg_strcasecmp(yytext, "create") == 0) + cur_state->create_schema_state = CS_BODY_CREATE; + else if (cur_state->create_schema_state == CS_BODY_CREATE && + pg_strcasecmp(yytext, "or") == 0) + cur_state->create_schema_state = CS_BODY_CREATE_OR; + else if (cur_state->create_schema_state == CS_BODY_CREATE_OR && + pg_strcasecmp(yytext, "replace") == 0) + cur_state->create_schema_state = CS_BODY_CREATE_OR_REPLACE; + else if ((cur_state->create_schema_state == CS_BODY_CREATE || + cur_state->create_schema_state == CS_BODY_CREATE_OR_REPLACE) && + (pg_strcasecmp(yytext, "function") == 0 || + pg_strcasecmp(yytext, "procedure") == 0)) + cur_state->create_schema_routine_body = true; + else + cur_state->create_schema_state = CS_BODY_NONE; + } + + track_begin_depth = + cur_state->identifiers[0] == 'c' && + (cur_state->identifiers[1] == 'f' || + cur_state->identifiers[1] == 'p' || + (cur_state->identifiers[1] == 'o' && + cur_state->identifiers[2] == 'r' && + (cur_state->identifiers[3] == 'f' || + cur_state->identifiers[3] == 'p')) || + (is_create_schema && + cur_state->create_schema_routine_body)); + + if (track_begin_depth && cur_state->paren_depth == 0) { if (pg_strcasecmp(yytext, "begin") == 0) @@ -973,6 +1032,9 @@ other . { if (cur_state->begin_depth > 0) cur_state->begin_depth--; + if (is_create_schema && + cur_state->begin_depth == 0) + psqlscan_reset_create_schema_body(cur_state); } } @@ -1294,6 +1356,7 @@ psql_scan_reset(PsqlScanState state) state->dolqstart = NULL; state->identifier_count = 0; state->begin_depth = 0; + psqlscan_reset_create_schema_body(state); } /* diff --git a/src/include/fe_utils/psqlscan_int.h b/src/include/fe_utils/psqlscan_int.h index 488f416f0e5..4c2b4db07c4 100644 --- a/src/include/fe_utils/psqlscan_int.h +++ b/src/include/fe_utils/psqlscan_int.h @@ -121,6 +121,13 @@ typedef struct PsqlScanStateData char identifiers[4]; /* records the first few identifiers */ int begin_depth; /* depth of begin/end pairs */ + /* + * State to track CREATE SCHEMA's nested CREATE [OR REPLACE] + * FUNCTION/PROCEDURE commands. + */ + int create_schema_state; + bool create_schema_routine_body; /* saw nested function/procedure */ + /* * Callback functions provided by the program making use of the lexer, * plus a void* callback passthrough argument. -- 2.50.1 (Apple Git-155)