From 8a4fc0a11b63cf10b5b5f0784bf3cbff5c4aa97d Mon Sep 17 00:00:00 2001 From: "Chao Li (Evan)" Date: Mon, 22 Jun 2026 15:33:04 +0800 Subject: [PATCH v2] psql: Fix CREATE SCHEMA scanning with object names like "begin" Since d51697484, psql's scanner treats CREATE SCHEMA as a command that may contain SQL-standard routine bodies, so that semicolons inside BEGIN ATOMIC ... END blocks do not terminate the command too early. However, the heuristic enabled BEGIN/END tracking for every CREATE SCHEMA statement. As a result, an object name such as "begin" could make interactive psql think the statement was still incomplete, for example: CREATE SCHEMA s CREATE VIEW begin AS SELECT 1; Fix this by tracking identifiers from each top-level CREATE schema element separately while scanning CREATE SCHEMA, and applying the existing CREATE [OR REPLACE] FUNCTION/PROCEDURE heuristic to the current schema element rather than to the whole CREATE SCHEMA statement. This intentionally leaves the existing top-level CREATE FUNCTION/PROCEDURE heuristic unchanged. Add interactive psql tests for false-positive cases and for nested SQL routine bodies inside CREATE SCHEMA. Author: Chao Li Reviewed-by: Tom Lane Discussion: https://postgr.es/m/8E03BB8D-003D-4850-9772-5F8015A5A0C7@gmail.com --- src/bin/psql/t/001_basic.pl | 76 ++++++++++++ src/fe_utils/psqlscan.l | 173 +++++++++++++++++++--------- src/include/fe_utils/psqlscan_int.h | 5 + 3 files changed, 199 insertions(+), 55 deletions(-) diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl index bbd330216ae..c7dd50bafd6 100644 --- a/src/bin/psql/t/001_basic.pl +++ b/src/bin/psql/t/001_basic.pl @@ -72,6 +72,82 @@ max_wal_senders = 4 }); $node->start; +{ + my $h = $node->interactive_psql('postgres'); + $h->set_query_timer_restart(); + + my $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA IF EXISTS psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema CREATE VIEW begin AS SELECT 1;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with object named begin ends at semicolon'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE VIEW v AS SELECT 1 AS function, 2 AS begin;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with column alias function and begin ends at semicolon'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE FUNCTION f() RETURNS int LANGUAGE sql AS \$\$ SELECT 1; \$\$\n" + . " CREATE VIEW begin AS SELECT 1;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with SQL function and later object named begin ends at semicolon'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE PROCEDURE p()\n" + . " BEGIN ATOMIC\n" + . " SELECT 1;\n" + . " END;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with SQL procedure body waits for END'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE FUNCTION f_case() RETURNS int\n" + . " BEGIN ATOMIC\n" + . " RETURN CASE WHEN true THEN 1 ELSE 0 END;\n" + . " END;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with SQL function body containing CASE waits for routine END'); + + $out = $h->query_until( + qr/^CREATE SCHEMA\r?$/m, + "DROP SCHEMA psql_scan_schema CASCADE;\n" + . "CREATE SCHEMA psql_scan_schema\n" + . " CREATE OR REPLACE FUNCTION f() RETURNS int\n" + . " BEGIN ATOMIC\n" + . " SELECT 1;\n" + . " END;\n"); + like( + $out, + qr/^CREATE SCHEMA\r?$/m, + 'CREATE SCHEMA with OR REPLACE SQL function body waits for END'); + + $h->quit or die "psql returned $?"; +} + psql_like($node, '\copyright', qr/Copyright/, '\copyright'); psql_like($node, '\help', qr/ALTER/, '\help without arguments'); psql_like($node, '\help SELECT', qr/SELECT/, '\help with argument'); diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index d29dda4d8e1..722924688c6 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -61,6 +61,122 @@ typedef int YYSTYPE; #define ECHO psqlscan_emit(cur_state, yytext, yyleng) +static void +psqlscan_reset_create_schema_identifiers(PsqlScanState state) +{ + memset(state->create_schema_identifiers, 0, + sizeof(state->create_schema_identifiers)); + state->create_schema_identifier_count = 0; +} + +static void +psqlscan_record_create_keyword(char *identifiers, int *identifier_count, + int identifiers_size, const char *identifier) +{ + if (*identifier_count < identifiers_size) + { + /* + * "schema" is needed for recognizing top-level CREATE SCHEMA. It is + * harmless in the CREATE SCHEMA subcommand buffer. + */ + if (pg_strcasecmp(identifier, "create") == 0 || + pg_strcasecmp(identifier, "function") == 0 || + pg_strcasecmp(identifier, "procedure") == 0 || + pg_strcasecmp(identifier, "or") == 0 || + pg_strcasecmp(identifier, "replace") == 0 || + pg_strcasecmp(identifier, "schema") == 0) + identifiers[*identifier_count] = + pg_tolower((unsigned char) identifier[0]); + } + + (*identifier_count)++; +} + +static bool +psqlscan_is_create_routine(const char *identifiers) +{ + return identifiers[0] == 'c' && + (identifiers[1] == 'f' || identifiers[1] == 'p' || + (identifiers[1] == 'o' && identifiers[2] == 'r' && + (identifiers[3] == 'f' || identifiers[3] == 'p'))); +} + +/* + * Track whether we are inside a BEGIN .. END block in a function definition, + * so that semicolons contained therein don't terminate the whole statement. + * Short of writing a full parser here, the following heuristic should work. + * We track whether the beginning of the statement matches CREATE [OR REPLACE] + * {FUNCTION|PROCEDURE}. For CREATE SCHEMA, track BEGIN .. END blocks only + * after recognizing an embedded CREATE [OR REPLACE] FUNCTION/PROCEDURE + * subcommand. + */ +static void +psqlscan_track_identifier(PsqlScanState state, const char *identifier) +{ + bool is_create_schema; + bool track_begin_depth; + + if (state->identifier_count == 0) + { + memset(state->identifiers, 0, sizeof(state->identifiers)); + psqlscan_reset_create_schema_identifiers(state); + } + + psqlscan_record_create_keyword(state->identifiers, + &state->identifier_count, + sizeof(state->identifiers), + identifier); + + is_create_schema = + state->identifiers[0] == 'c' && + state->identifiers[1] == 's'; + + /* + * In CREATE SCHEMA, track identifiers from each top-level CREATE schema + * element separately, so that BEGIN/END tracking is enabled only for nested + * CREATE [OR REPLACE] FUNCTION/PROCEDURE elements. This avoids treating + * identifiers appearing in expressions as schema elements. + */ + if (is_create_schema && + state->paren_depth == 0 && + state->begin_depth == 0) + { + if (pg_strcasecmp(identifier, "create") == 0) + psqlscan_reset_create_schema_identifiers(state); + + psqlscan_record_create_keyword(state->create_schema_identifiers, + &state->create_schema_identifier_count, + sizeof(state->create_schema_identifiers), + identifier); + } + + track_begin_depth = + psqlscan_is_create_routine(state->identifiers) || + (is_create_schema && + psqlscan_is_create_routine(state->create_schema_identifiers)); + + if (track_begin_depth && + state->paren_depth == 0) + { + if (pg_strcasecmp(identifier, "begin") == 0) + state->begin_depth++; + else if (pg_strcasecmp(identifier, "case") == 0) + { + /* + * CASE also ends with END. We only need to track this if we are + * already inside a BEGIN. + */ + if (state->begin_depth >= 1) + state->begin_depth++; + } + else if (pg_strcasecmp(identifier, "end") == 0) + { + if (state->begin_depth > 0) + state->begin_depth--; + } + } +} + %} %option reentrant @@ -921,61 +1037,7 @@ other . {identifier} { - /* - * We need to track if we are inside a BEGIN .. END block - * in a function definition, so that semicolons contained - * therein don't terminate the whole statement. Short of - * writing a full parser here, the following heuristic - * should work. First, we track whether the beginning of - * the statement matches CREATE [OR REPLACE] - * {FUNCTION|PROCEDURE|SCHEMA}. (Allowing this in - * CREATE SCHEMA, without tracking whether we're within a - * CREATE FUNCTION/PROCEDURE subcommand, is a bit shaky - * but should be okay with the present set of valid - * subcommands.) - */ - - if (cur_state->identifier_count == 0) - memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers)); - - if (cur_state->identifier_count < sizeof(cur_state->identifiers)) - { - if (pg_strcasecmp(yytext, "create") == 0 || - pg_strcasecmp(yytext, "function") == 0 || - pg_strcasecmp(yytext, "procedure") == 0 || - pg_strcasecmp(yytext, "or") == 0 || - pg_strcasecmp(yytext, "replace") == 0 || - pg_strcasecmp(yytext, "schema") == 0) - cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]); - } - - cur_state->identifier_count++; - - if (cur_state->identifiers[0] == 'c' && - (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' || - (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' && - (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p')) || - cur_state->identifiers[1] == 's') && - cur_state->paren_depth == 0) - { - if (pg_strcasecmp(yytext, "begin") == 0) - cur_state->begin_depth++; - else if (pg_strcasecmp(yytext, "case") == 0) - { - /* - * CASE also ends with END. We only need to track - * this if we are already inside a BEGIN. - */ - if (cur_state->begin_depth >= 1) - cur_state->begin_depth++; - } - else if (pg_strcasecmp(yytext, "end") == 0) - { - if (cur_state->begin_depth > 0) - cur_state->begin_depth--; - } - } - + psqlscan_track_identifier(cur_state, yytext); ECHO; } @@ -1294,6 +1356,7 @@ psql_scan_reset(PsqlScanState state) state->dolqstart = NULL; state->identifier_count = 0; state->begin_depth = 0; + psqlscan_reset_create_schema_identifiers(state); } /* diff --git a/src/include/fe_utils/psqlscan_int.h b/src/include/fe_utils/psqlscan_int.h index 488f416f0e5..0d8b9034028 100644 --- a/src/include/fe_utils/psqlscan_int.h +++ b/src/include/fe_utils/psqlscan_int.h @@ -121,6 +121,11 @@ typedef struct PsqlScanStateData char identifiers[4]; /* records the first few identifiers */ int begin_depth; /* depth of begin/end pairs */ + int create_schema_identifier_count; /* first few identifiers of a + * CREATE SCHEMA element */ + char create_schema_identifiers[4]; /* count for + * create_schema_identifiers */ + /* * Callback functions provided by the program making use of the lexer, * plus a void* callback passthrough argument. -- 2.50.1 (Apple Git-155)