From 350f3234f318059826a2b2bd96e91e4d7fa22253 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Tue, 3 Mar 2026 17:06:59 +0900 Subject: [PATCH 3/8] Expand RPR test coverage and improve test comments diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index 8af44392700..35b90a04492 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -1,5 +1,10 @@ -- --- Test for row pattern definition clause +-- Test for row pattern recognition: WINDOW clause integration and +-- real-world scenario tests using stock price data. +-- +-- Parser/planner tests: rpr_base.sql +-- NFA engine tests: rpr_nfa.sql +-- EXPLAIN statistics tests: rpr_explain.sql -- CREATE TEMP TABLE stock ( company TEXT, @@ -51,6 +56,9 @@ SELECT * FROM stock; company2 | 07-10-2023 | 1300 (20 rows) +-- +-- Basic pattern matching with PREV/NEXT +-- -- basic test using PREV SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, nth_value(tdate, 2) OVER w AS nth_second @@ -426,7 +434,7 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER company2 | 07-10-2023 | 1300 | | (20 rows) --- basic test with none-greedy pattern +-- basic test with fixed-length pattern (A A A = exactly 3) SELECT company, tdate, price, count(*) OVER w FROM stock WINDOW w AS ( @@ -752,7 +760,9 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER company2 | 07-10-2023 | 1300 | | (20 rows) --- using AFTER MATCH SKIP TO NEXT ROW +-- using AFTER MATCH SKIP TO NEXT ROW (same pattern as above; +-- match length is always 2, so result is identical to SKIP PAST LAST ROW. +-- SKIP TO NEXT ROW's distinct effect is tested in backtracking section.) SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( @@ -789,6 +799,179 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER company2 | 07-10-2023 | 1300 | | (20 rows) +-- PREV returns NULL at partition's first row (null_slot path) +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (BOUNDARY REST+) + DEFINE + BOUNDARY AS PREV(price) IS NULL, + REST AS PREV(price) IS NOT NULL +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 10 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 10 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- NEXT returns NULL at partition's last row (null_slot path) +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ BOUNDARY) + DEFINE + A AS NEXT(price) IS NOT NULL, + BOUNDARY AS NEXT(price) IS NULL +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 10 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 10 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- DESC order: PREV refers to the row with later date +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate DESC + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START DOWN+ UP+) + DEFINE + START AS TRUE, + DOWN AS price < PREV(price), + UP AS price > PREV(price) +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-10-2023 | 130 | 3 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-07-2023 | 110 | 3 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-01-2023 | 100 | 0 + company2 | 07-10-2023 | 1300 | 3 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-07-2023 | 1100 | 3 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-01-2023 | 50 | 0 +(20 rows) + +-- Multiple partitions with unequal sizes +WITH multi_part AS ( + SELECT * FROM (VALUES + ('a', 1, 10), ('a', 2, 20), ('a', 3, 15), + ('b', 1, 5), + ('c', 1, 100), ('c', 2, 200), ('c', 3, 150), ('c', 4, 140), ('c', 5, 300) + ) AS t(grp, id, val) +) +SELECT grp, id, val, count(*) OVER w +FROM multi_part +WINDOW w AS ( + PARTITION BY grp + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS val <= NEXT(val), + B AS val > PREV(val) OR val < PREV(val) +); + grp | id | val | count +-----+----+-----+------- + a | 1 | 10 | 3 + a | 2 | 20 | 0 + a | 3 | 15 | 0 + b | 1 | 5 | 0 + c | 1 | 100 | 5 + c | 2 | 200 | 0 + c | 3 | 150 | 0 + c | 4 | 140 | 0 + c | 5 | 300 | 0 +(9 rows) + +-- FLOAT/NUMERIC DEFINE conditions +WITH float_data AS ( + SELECT * FROM (VALUES + (1, 1.0::float8), (2, 1.5), (3, 1.4999), (4, 1.50001), (5, 0.1) + ) AS t(id, val) +) +SELECT id, val, count(*) OVER w +FROM float_data +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS val > PREV(val) * 0.99 +); + id | val | count +----+---------+------- + 1 | 1 | 4 + 2 | 1.5 | 0 + 3 | 1.4999 | 0 + 4 | 1.50001 | 0 + 5 | 0.1 | 0 +(5 rows) + +-- +-- SKIP TO / Backtracking / Frame boundary +-- -- match everything SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock @@ -1128,6 +1311,47 @@ DOWN AS price < PREV(price) company2 | 07-10-2023 | 1300 | | | | | | | 0 (20 rows) +-- row_number() within RPR reduced frame +SELECT company, tdate, price, row_number() OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | row_number | count +----------+------------+-------+------------+------- + company1 | 07-01-2023 | 100 | 1 | 4 + company1 | 07-02-2023 | 200 | 2 | 0 + company1 | 07-03-2023 | 150 | 3 | 0 + company1 | 07-04-2023 | 140 | 4 | 0 + company1 | 07-05-2023 | 150 | 5 | 0 + company1 | 07-06-2023 | 90 | 6 | 4 + company1 | 07-07-2023 | 110 | 7 | 0 + company1 | 07-08-2023 | 130 | 8 | 0 + company1 | 07-09-2023 | 120 | 9 | 0 + company1 | 07-10-2023 | 130 | 10 | 0 + company2 | 07-01-2023 | 50 | 1 | 4 + company2 | 07-02-2023 | 2000 | 2 | 0 + company2 | 07-03-2023 | 1500 | 3 | 0 + company2 | 07-04-2023 | 1400 | 4 | 0 + company2 | 07-05-2023 | 1500 | 5 | 0 + company2 | 07-06-2023 | 60 | 6 | 4 + company2 | 07-07-2023 | 1100 | 7 | 0 + company2 | 07-08-2023 | 1300 | 8 | 0 + company2 | 07-09-2023 | 1200 | 9 | 0 + company2 | 07-10-2023 | 1300 | 10 | 0 +(20 rows) + +-- +-- SQL Integration: JOIN, CTE, LATERAL +-- -- JOIN case CREATE TEMP TABLE t1 (i int, v1 int); CREATE TEMP TABLE t2 (j int, v2 int); @@ -1262,6 +1486,9 @@ SELECT id, i, j, count(*) OVER w 1 | 10 | 20 | 0 (10 rows) +-- +-- Large-scale / scalability tests +-- -- Smoke test for larger partitions. WITH s AS ( SELECT v, count(*) OVER w AS c @@ -1329,7 +1556,7 @@ SELECT match_first, match_last, match_len FROM result WHERE match_len > 0; (1 row) -- --- Using IGNORE NULLS +-- IGNORE NULLS -- -- no NULL rows case. The result should be identical with "basic test using PREV" SELECT company, tdate, price, first_value(price) IGNORE NULLS OVER w, @@ -1371,7 +1598,7 @@ SELECT company, tdate, price, first_value(price) IGNORE NULLS OVER w, (20 rows) -- nth_value with IGNORE NULLS option wants to find the second row but --- due a NULL in the midlle, it returns the third row. +-- due to a NULL in the middle, it returns the third row. WITH data AS ( SELECT * FROM (VALUES (10, 1), (11, NULL), (12, 3), (13, 4) @@ -1395,8 +1622,8 @@ WITH data AS ( (4 rows) -- nth_value with IGNORE NULLS option wants to find the third row but --- due a NULL in the midlle, it reaches the end of reduced frame and --- return NULL +-- due to a NULL in the middle, it reaches the end of reduced frame and +-- returns NULL WITH data AS ( SELECT * FROM (VALUES (10, 1), (11, NULL), (12, 3), (13, 4) @@ -1459,2555 +1686,156 @@ WINDOW w AS ( company2 | 07-10-2023 | 1300 | (20 rows) --- View and pg_get_viewdef tests. -CREATE TEMP VIEW v_window AS -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, - nth_value(tdate, 2) OVER w AS nth_second - FROM stock - WINDOW w AS ( - PARTITION BY company +-- IGNORE NULLS + first_value where first value in reduced frame is NULL +WITH data AS ( + SELECT * FROM (VALUES + (1, NULL), (2, NULL), (3, 30), (4, 40) + ) AS t(id, val)) +SELECT id, val, + first_value(val) IGNORE NULLS OVER w AS fv_ignull, + count(*) OVER w +FROM data +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); -SELECT * FROM v_window; - company | tdate | price | first_value | last_value | nth_second -----------+------------+-------+-------------+------------+------------ - company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 - company1 | 07-02-2023 | 200 | | | - company1 | 07-03-2023 | 150 | | | - company1 | 07-04-2023 | 140 | | | - company1 | 07-05-2023 | 150 | | | - company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 - company1 | 07-07-2023 | 110 | | | - company1 | 07-08-2023 | 130 | | | - company1 | 07-09-2023 | 120 | | | - company1 | 07-10-2023 | 130 | | | - company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 - company2 | 07-02-2023 | 2000 | | | - company2 | 07-03-2023 | 1500 | | | - company2 | 07-04-2023 | 1400 | | | - company2 | 07-05-2023 | 1500 | | | - company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 - company2 | 07-07-2023 | 1100 | | | - company2 | 07-08-2023 | 1300 | | | - company2 | 07-09-2023 | 1200 | | | - company2 | 07-10-2023 | 1300 | | | -(20 rows) - -SELECT pg_get_viewdef('v_window'); - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - first_value(price) OVER w AS first_value, + - last_value(price) OVER w AS last_value, + - nth_value(tdate, 2) OVER w AS nth_second + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (start up+ down+) + - DEFINE + - start AS true, + - up AS (price > prev(price)), + - down AS (price < prev(price)) ); -(1 row) + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); + id | val | fv_ignull | count +----+-----+-----------+------- + 1 | | 30 | 4 + 2 | | | 0 + 3 | 30 | | 0 + 4 | 40 | | 0 +(4 rows) --- --- Pattern optimization tests --- VIEW shows original pattern, EXPLAIN shows optimized pattern --- --- Test: duplicate alternatives removal (A | B | A)+ -> (A | B)+ -CREATE TEMP VIEW v_opt_dup AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company +-- IGNORE NULLS + all values NULL in reduced frame +WITH data AS ( + SELECT * FROM (VALUES + (1, NULL), (2, NULL), (3, NULL) + ) AS t(id, val)) +SELECT id, val, + first_value(val) IGNORE NULLS OVER w AS fv_ignull, + last_value(val) IGNORE NULLS OVER w AS lv_ignull, + count(*) OVER w +FROM data +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | B | A)+) - DEFINE - A AS price > 100, - B AS price <= 100 + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE ); -SELECT pg_get_viewdef('v_opt_dup'); -- original: ((a | b | a)+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a | b | a)+) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) + id | val | fv_ignull | lv_ignull | count +----+-----+-----------+-----------+------- + 1 | | | | 3 + 2 | | | | 0 + 3 | | | | 0 +(3 rows) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup; -- optimized: ((a | b)+) - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_dup - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a | b)+ - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) +-- +-- last_value IGNORE NULLS with reduced frame containing all NULLs +-- Exercises ignorenulls_getfuncarginframe SEEK_TAIL out-of-frame path +-- when notnull_relpos >= num_reduced_frame. +-- +CREATE TEMP TABLE rpr_nullval (id INT, val INT); +INSERT INTO rpr_nullval VALUES (1, 10), (2, NULL), (3, NULL), (4, 20); +SELECT id, val, + last_value(val) IGNORE NULLS OVER w AS lv_ignull, + count(*) OVER w AS cnt +FROM rpr_nullval +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS val IS NOT NULL, + B AS val IS NULL +); + id | val | lv_ignull | cnt +----+-----+-----------+----- + 1 | 10 | 10 | 3 + 2 | | | 0 + 3 | | | 0 + 4 | 20 | | 0 +(4 rows) --- Test: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ -CREATE TEMP VIEW v_opt_dup_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | B)+ | (A | B)+) - DEFINE - A AS price > 100, - B AS price <= 100 +-- +-- NULL handling +-- +CREATE TEMP TABLE stock_null (company TEXT, tdate DATE, price INTEGER); +INSERT INTO stock_null VALUES ('c1', '2023-07-01', 100); +INSERT INTO stock_null VALUES ('c1', '2023-07-02', NULL); -- NULL in middle +INSERT INTO stock_null VALUES ('c1', '2023-07-03', 200); +INSERT INTO stock_null VALUES ('c1', '2023-07-04', 150); +SELECT company, tdate, price, count(*) OVER w AS match_count +FROM stock_null +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP DOWN) + DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < +PREV(price) ); -SELECT pg_get_viewdef('v_opt_dup_group'); -- original: ((a | b)+ | (a | b)+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a | b)+ | (a | b)+) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup_group; -- optimized: ((a | b)+) - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_dup_group - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a | b)+ - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) + company | tdate | price | match_count +---------+------------+-------+------------- + c1 | 07-01-2023 | 100 | 0 + c1 | 07-02-2023 | | 0 + c1 | 07-03-2023 | 200 | 0 + c1 | 07-04-2023 | 150 | 0 +(4 rows) --- Test: consecutive vars merge (A A A) -> A{3} -CREATE TEMP VIEW v_opt_merge AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company +-- Consecutive NULLs: PREV navigates through NULL values +CREATE TEMP TABLE rpr_consec_null (id INT, val INT); +INSERT INTO rpr_consec_null VALUES + (1, 100), (2, NULL), (3, NULL), (4, NULL), (5, 200), (6, 300); +-- PREV(val) IS NULL succeeds for both null_slot (first row) and actual NULL +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_consec_null +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A A A) - DEFINE - A AS price >= 140 AND price <= 150 -); -SELECT pg_get_viewdef('v_opt_merge'); -- original: (a a a) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a a a) + - DEFINE + - a AS ((price >= 140) AND (price <= 150)) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge; -- optimized: a{3} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a{3} - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+ C) + DEFINE + A AS val IS NULL, + B AS val IS NULL AND PREV(val) IS NULL, + C AS val IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 100 | 0 + 2 | | 4 + 3 | | 0 + 4 | | 0 + 5 | 200 | 0 + 6 | 300 | 0 +(6 rows) --- Test: quantified vars merge (A A+ A) -> A{3,} -CREATE TEMP VIEW v_opt_merge_quant AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company +-- NEXT(val) through consecutive NULLs +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_consec_null +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A A+ A) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_merge_quant'); -- original: (a a+ a) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a a+ a) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_quant; -- optimized: a{3,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_quant - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a{3,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: merge two unbounded (A+ A+) -> A{2,} -CREATE TEMP VIEW v_opt_merge_unbounded AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A+ A+) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_merge_unbounded'); -- original: (a+ a+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a+ a+) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_unbounded; -- optimized: a{2,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_unbounded - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a{2,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: merge with zero-min (A* A+) -> A+ -CREATE TEMP VIEW v_opt_merge_star AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A* A+) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_merge_star'); -- original: (a* a+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a* a+) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_star; -- optimized: a+ - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_star - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a+" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: complex merge (A A{2} A+ A{3}) -> A{7,} -CREATE TEMP VIEW v_opt_merge_complex AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A A{2} A+ A{3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_merge_complex'); -- original: (a a{2} a+ a{3}) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a a{2} a+ a{3}) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_complex; -- optimized: a{7,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_complex - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a{7,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: group merge ((A B) (A B)+) -> (A B){2,} -CREATE TEMP VIEW v_opt_merge_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B) (A B)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_group'); -- original: ((a b) (a b)+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a b) (a b)+) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group; -- expected: (a b){2,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_group - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a' b'){2,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: group merge A B (A B)+ -> (A B){2,} -CREATE TEMP VIEW v_opt_merge_group2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A B (A B)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_group2'); -- original: (a b (a b)+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a b (a b)+) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group2; -- expected: (a b){2,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_group2 - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a' b'){2,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: group merge (A B) (A B)+ (A B) -> (A B){3,} -CREATE TEMP VIEW v_opt_merge_group3 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B) (A B)+ (A B)) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_group3'); -- original: ((a b) (a b)+ (a b)) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a b) (a b)+ (a b)) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group3; -- expected: (a b){3,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_group3 - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a' b'){3,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: group merge A B A B (A B)+ A B A B -> (A B){5,} -CREATE TEMP VIEW v_opt_merge_group4 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A B A B (A B)+ A B A B) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_group4'); -- original: (a b a b (a b)+ a b a b) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a b a b (a b)+ a b a b) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group4; -- expected: (a b){5,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_group4 - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a' b'){5,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: group merge C A B (A B)+ A B C -> C (A B){3,} C -CREATE TEMP VIEW v_opt_merge_group5 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (C A B (A B)+ A B C) - DEFINE - A AS price > 100, - B AS price <= 100, - C AS price > 200 -); -SELECT pg_get_viewdef('v_opt_merge_group5'); -- original: (c a b (a b)+ a b c) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (c a b (a b)+ a b c) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100), + - c AS (price > 200) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group5; -- expected: c (a b){3,} c - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_group5 - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: c (a b){3,} c - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: consecutive GROUP merge (A B)+ (A B)+ -> (A B){2,} -CREATE TEMP VIEW v_opt_merge_consec_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B)+ (A B)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_consec_group'); -- original: ((a b)+ (a b)+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a b)+ (a b)+) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group; -- expected: (a b){2,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_consec_group - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a' b'){2,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: consecutive GROUP merge with different quantifiers (A B){2} (A B){3} -> (A B){5} -CREATE TEMP VIEW v_opt_merge_consec_group2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B){2} (A B){3}) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_consec_group2'); -- original: ((a b){2} (a b){3}) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a b){2} (a b){3}) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group2; -- expected: (a b){5} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_merge_consec_group2 - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a b){5} - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test {n} quantifier display -CREATE TEMP VIEW v_quantifier_n AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A{3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_quantifier_n'); - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a{3}) + - DEFINE + - a AS (price > 100) ); -(1 row) - --- Test {n,} quantifier display -CREATE TEMP VIEW v_quantifier_n_plus AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A{2,}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_quantifier_n_plus'); - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a{2,}) + - DEFINE + - a AS (price > 100) ); -(1 row) - --- Test: flatten nested SEQ (A (B C)) -> A B C -CREATE TEMP VIEW v_opt_flatten_seq AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A (B C)) - DEFINE - A AS price > 100, - B AS price > 150, - C AS price < 150 -); -SELECT pg_get_viewdef('v_opt_flatten_seq'); -- original: (a (b c)) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a (b c)) + - DEFINE + - a AS (price > 100), + - b AS (price > 150), + - c AS (price < 150) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_seq; -- optimized: a b c - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_flatten_seq - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a b c - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: flatten nested ALT (A | (B | C)) -> (A | B | C) -CREATE TEMP VIEW v_opt_flatten_alt AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | (B | C))+) - DEFINE - A AS price > 200, - B AS price > 100, - C AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_flatten_alt'); -- original: ((a | (b | c))+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a | (b | c))+) + - DEFINE + - a AS (price > 200), + - b AS (price > 100), + - c AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_alt; -- optimized: ((a | b | c))+ - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_flatten_alt - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a | b | c)+ - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: unwrap GROUP{1,1} ((A)) -> A -CREATE TEMP VIEW v_opt_unwrap_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A))) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_unwrap_group'); -- original: (((a))) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (((a))) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_group; -- optimized: a - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_unwrap_group - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: quantifier multiplication (A{2}){3} -> A{6} -CREATE TEMP VIEW v_opt_quant_mult AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A{2}){3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult'); -- original: ((a{2}){3}) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a{2}){3}) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult; -- optimized: a{6} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_quant_mult - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a{6} - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: quantifier multiplication (A{2,4}){3} -> A{6,12} -CREATE TEMP VIEW v_opt_quant_mult_range AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A{2,4}){3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult_range'); -- original: ((a{2,4}){3}) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a{2,4}){3}) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range; -- optimized: a{6,12} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_quant_mult_range - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a{6,12} - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: quantifier multiplication blocked (A{2}){3,5} -> no change --- outer range with child exact > 1 causes gaps (6,8,10 not 6,7,8,9,10) -CREATE TEMP VIEW v_opt_quant_mult_range2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A{2}){3,5}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult_range2'); -- original: ((a{2}){3,5}) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a{2}){3,5}) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range2; -- NOT optimized: (a{2}){3,5} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_quant_mult_range2 - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a{2}){3,5} - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: quantifier multiplication blocked by INF (A+){3} -> no change -CREATE TEMP VIEW v_opt_quant_mult_inf AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A+){3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult_inf'); -- original: ((a+){3}) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a+){3}) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_inf; -- no multiply: (a+){3} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_quant_mult_inf - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a+"){3} - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: unwrap single-item ALT after duplicate removal (A | A) -> A -CREATE TEMP VIEW v_opt_unwrap_alt AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | A)+) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_unwrap_alt'); -- original: ((a | a)+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN ((a | a)+) + - DEFINE + - a AS (price > 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_alt; -- optimized: a+ - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_unwrap_alt - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a+" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: GROUP{1,1} to SEQ with flatten ((A B)(C D)) -> A B C D -CREATE TEMP VIEW v_opt_group_to_seq AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A B)(C D))) - DEFINE - A AS price > 200, - B AS price > 150, - C AS price > 100, - D AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_group_to_seq'); -- original: (((a b)(c d))) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (((a b) (c d))) + - DEFINE + - a AS (price > 200), + - b AS (price > 150), + - c AS (price > 100), + - d AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_group_to_seq; -- optimized: a b c d - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_group_to_seq - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: a b c d - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: combined consecutive GROUP + prefix merge A B (A B)+ (A B)+ -> (A B){3,} -CREATE TEMP VIEW v_opt_combined_merge AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A B (A B)+ (A B)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_combined_merge'); -- original: (a b (a b)+ (a b)+) - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (a b (a b)+ (a b)+) + - DEFINE + - a AS (price > 100), + - b AS (price <= 100) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_combined_merge; -- expected: (a b){3,} - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_combined_merge - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a' b'){3,}" - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: nested ALT pattern - bug reproduction -CREATE TEMP VIEW v_opt_nested_alt AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A B) | C) D | A B C) - DEFINE - A AS price <= 100, - B AS price <= 150, - C AS price <= 200, - D AS price > 200 -); -SELECT pg_get_viewdef('v_opt_nested_alt'); - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (((a b) | c) d | a b c) + - DEFINE + - a AS (price <= 100), + - b AS (price <= 150), + - c AS (price <= 200), + - d AS (price > 200) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_nested_alt - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: ((a b | c) d | a b c) - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- Test: nested ALT with unbounded - A+ inside -CREATE TEMP VIEW v_opt_nested_alt2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A+ B) | C) D | A B C) - DEFINE - A AS price <= 100, - B AS price <= 150, - C AS price <= 200, - D AS price > 200 -); -SELECT pg_get_viewdef('v_opt_nested_alt2'); - pg_get_viewdef ---------------------------------------------------------------------------------------- - SELECT company, + - tdate, + - price, + - count(*) OVER w AS count + - FROM stock + - WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + - AFTER MATCH SKIP PAST LAST ROW + - INITIAL + - PATTERN (((a+ b) | c) d | a b c) + - DEFINE + - a AS (price <= 100), + - b AS (price <= 150), + - c AS (price <= 200), + - d AS (price > 200) ); -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt2; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Subquery Scan on v_opt_nested_alt2 - -> WindowAgg - Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: ((a+" b | c) d | a b c) - -> Sort - Sort Key: stock.company - -> Seq Scan on stock -(7 rows) - --- --- Error cases --- --- row pattern definition variable name must not appear more than once -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price), - UP AS price > PREV(price) -); -ERROR: row pattern definition variable name "up" appears more than once in DEFINE clause -LINE 11: UP AS price > PREV(price), - ^ --- subqueries in DEFINE clause are not supported -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START LOWPRICE) - DEFINE - START AS TRUE, - LOWPRICE AS price < (SELECT 100) -); -ERROR: cannot use subquery in DEFINE expression -LINE 11: LOWPRICE AS price < (SELECT 100) - ^ --- aggregates in DEFINE clause are not supported -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START LOWPRICE) - DEFINE - START AS TRUE, - LOWPRICE AS price < count(*) -); -ERROR: aggregate functions are not allowed in DEFINE -LINE 11: LOWPRICE AS price < count(*) - ^ --- FRAME must start at current row when row pattern recognition is used -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); -ERROR: FRAME must start at CURRENT ROW when row pattern recognition is used -LINE 6: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ^ -DETAIL: Current frame starts with UNBOUNDED PRECEDING. -HINT: Use: ROWS BETWEEN CURRENT ROW AND ... --- EXCLUDE is not permitted -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - EXCLUDE CURRENT ROW - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); -ERROR: EXCLUDE options are not permitted when row pattern recognition is used -LINE 7: EXCLUDE CURRENT ROW - ^ -DETAIL: Frame definition includes EXCLUDE CURRENT ROW. -HINT: Remove the EXCLUDE clause from the window definition. --- SEEK is not supported -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - SEEK - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); -ERROR: SEEK is not supported -LINE 8: SEEK - ^ -HINT: Use INITIAL instead. --- PREV's argument must have at least 1 column reference -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(1), - DOWN AS price < PREV(1) -); -ERROR: row pattern navigation operation's argument must include at least one column reference --- Unsupported quantifier -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - INITIAL - PATTERN (START UP~ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(1), - DOWN AS price < PREV(1) -); -ERROR: unsupported quantifier "~" -LINE 9: PATTERN (START UP~ DOWN+) - ^ -HINT: Valid quantifiers are: *, +, ?, *?, +?, ??, {n}, {n,}, {,m}, {n,m} and their reluctant versions. --- PREV(1) missing column reference (error) -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - INITIAL - PATTERN (START UP+? DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(1), - DOWN AS price < PREV(1) -); -ERROR: row pattern navigation operation's argument must include at least one column reference --- Maximum pattern variables is 251 (RPR_VARID_MAX) --- Error: 252 variables exceeds limit of 251 -DO $$ -DECLARE - pattern_vars text; - define_vars text; - query text; -BEGIN - SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), - string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') - INTO pattern_vars, define_vars - FROM generate_series(1, 252) i; - - query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (%s) - DEFINE %s)', pattern_vars, define_vars); - - EXECUTE query; -END; -$$; -ERROR: too many pattern variables -DETAIL: Maximum is 251. -CONTEXT: SQL statement "SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (v001 v002 v003 v004 v005 v006 v007 v008 v009 v010 v011 v012 v013 v014 v015 v016 v017 v018 v019 v020 v021 v022 v023 v024 v025 v026 v027 v028 v029 v030 v031 v032 v033 v034 v035 v036 v037 v038 v039 v040 v041 v042 v043 v044 v045 v046 v047 v048 v049 v050 v051 v052 v053 v054 v055 v056 v057 v058 v059 v060 v061 v062 v063 v064 v065 v066 v067 v068 v069 v070 v071 v072 v073 v074 v075 v076 v077 v078 v079 v080 v081 v082 v083 v084 v085 v086 v087 v088 v089 v090 v091 v092 v093 v094 v095 v096 v097 v098 v099 v100 v101 v102 v103 v104 v105 v106 v107 v108 v109 v110 v111 v112 v113 v114 v115 v116 v117 v118 v119 v120 v121 v122 v123 v124 v125 v126 v127 v128 v129 v130 v131 v132 v133 v134 v135 v136 v137 v138 v139 v140 v141 v142 v143 v144 v145 v146 v147 v148 v149 v150 v151 v152 v153 v154 v155 v156 v157 v158 v159 v160 v161 v162 v163 v164 v165 v166 v167 v168 v169 v170 v171 v172 v173 v174 v175 v176 v177 v178 v179 v180 v181 v182 v183 v184 v185 v186 v187 v188 v189 v190 v191 v192 v193 v194 v195 v196 v197 v198 v199 v200 v201 v202 v203 v204 v205 v206 v207 v208 v209 v210 v211 v212 v213 v214 v215 v216 v217 v218 v219 v220 v221 v222 v223 v224 v225 v226 v227 v228 v229 v230 v231 v232 v233 v234 v235 v236 v237 v238 v239 v240 v241 v242 v243 v244 v245 v246 v247 v248 v249 v250 v251 v252) - DEFINE v001 AS TRUE, v002 AS TRUE, v003 AS TRUE, v004 AS TRUE, v005 AS TRUE, v006 AS TRUE, v007 AS TRUE, v008 AS TRUE, v009 AS TRUE, v010 AS TRUE, v011 AS TRUE, v012 AS TRUE, v013 AS TRUE, v014 AS TRUE, v015 AS TRUE, v016 AS TRUE, v017 AS TRUE, v018 AS TRUE, v019 AS TRUE, v020 AS TRUE, v021 AS TRUE, v022 AS TRUE, v023 AS TRUE, v024 AS TRUE, v025 AS TRUE, v026 AS TRUE, v027 AS TRUE, v028 AS TRUE, v029 AS TRUE, v030 AS TRUE, v031 AS TRUE, v032 AS TRUE, v033 AS TRUE, v034 AS TRUE, v035 AS TRUE, v036 AS TRUE, v037 AS TRUE, v038 AS TRUE, v039 AS TRUE, v040 AS TRUE, v041 AS TRUE, v042 AS TRUE, v043 AS TRUE, v044 AS TRUE, v045 AS TRUE, v046 AS TRUE, v047 AS TRUE, v048 AS TRUE, v049 AS TRUE, v050 AS TRUE, v051 AS TRUE, v052 AS TRUE, v053 AS TRUE, v054 AS TRUE, v055 AS TRUE, v056 AS TRUE, v057 AS TRUE, v058 AS TRUE, v059 AS TRUE, v060 AS TRUE, v061 AS TRUE, v062 AS TRUE, v063 AS TRUE, v064 AS TRUE, v065 AS TRUE, v066 AS TRUE, v067 AS TRUE, v068 AS TRUE, v069 AS TRUE, v070 AS TRUE, v071 AS TRUE, v072 AS TRUE, v073 AS TRUE, v074 AS TRUE, v075 AS TRUE, v076 AS TRUE, v077 AS TRUE, v078 AS TRUE, v079 AS TRUE, v080 AS TRUE, v081 AS TRUE, v082 AS TRUE, v083 AS TRUE, v084 AS TRUE, v085 AS TRUE, v086 AS TRUE, v087 AS TRUE, v088 AS TRUE, v089 AS TRUE, v090 AS TRUE, v091 AS TRUE, v092 AS TRUE, v093 AS TRUE, v094 AS TRUE, v095 AS TRUE, v096 AS TRUE, v097 AS TRUE, v098 AS TRUE, v099 AS TRUE, v100 AS TRUE, v101 AS TRUE, v102 AS TRUE, v103 AS TRUE, v104 AS TRUE, v105 AS TRUE, v106 AS TRUE, v107 AS TRUE, v108 AS TRUE, v109 AS TRUE, v110 AS TRUE, v111 AS TRUE, v112 AS TRUE, v113 AS TRUE, v114 AS TRUE, v115 AS TRUE, v116 AS TRUE, v117 AS TRUE, v118 AS TRUE, v119 AS TRUE, v120 AS TRUE, v121 AS TRUE, v122 AS TRUE, v123 AS TRUE, v124 AS TRUE, v125 AS TRUE, v126 AS TRUE, v127 AS TRUE, v128 AS TRUE, v129 AS TRUE, v130 AS TRUE, v131 AS TRUE, v132 AS TRUE, v133 AS TRUE, v134 AS TRUE, v135 AS TRUE, v136 AS TRUE, v137 AS TRUE, v138 AS TRUE, v139 AS TRUE, v140 AS TRUE, v141 AS TRUE, v142 AS TRUE, v143 AS TRUE, v144 AS TRUE, v145 AS TRUE, v146 AS TRUE, v147 AS TRUE, v148 AS TRUE, v149 AS TRUE, v150 AS TRUE, v151 AS TRUE, v152 AS TRUE, v153 AS TRUE, v154 AS TRUE, v155 AS TRUE, v156 AS TRUE, v157 AS TRUE, v158 AS TRUE, v159 AS TRUE, v160 AS TRUE, v161 AS TRUE, v162 AS TRUE, v163 AS TRUE, v164 AS TRUE, v165 AS TRUE, v166 AS TRUE, v167 AS TRUE, v168 AS TRUE, v169 AS TRUE, v170 AS TRUE, v171 AS TRUE, v172 AS TRUE, v173 AS TRUE, v174 AS TRUE, v175 AS TRUE, v176 AS TRUE, v177 AS TRUE, v178 AS TRUE, v179 AS TRUE, v180 AS TRUE, v181 AS TRUE, v182 AS TRUE, v183 AS TRUE, v184 AS TRUE, v185 AS TRUE, v186 AS TRUE, v187 AS TRUE, v188 AS TRUE, v189 AS TRUE, v190 AS TRUE, v191 AS TRUE, v192 AS TRUE, v193 AS TRUE, v194 AS TRUE, v195 AS TRUE, v196 AS TRUE, v197 AS TRUE, v198 AS TRUE, v199 AS TRUE, v200 AS TRUE, v201 AS TRUE, v202 AS TRUE, v203 AS TRUE, v204 AS TRUE, v205 AS TRUE, v206 AS TRUE, v207 AS TRUE, v208 AS TRUE, v209 AS TRUE, v210 AS TRUE, v211 AS TRUE, v212 AS TRUE, v213 AS TRUE, v214 AS TRUE, v215 AS TRUE, v216 AS TRUE, v217 AS TRUE, v218 AS TRUE, v219 AS TRUE, v220 AS TRUE, v221 AS TRUE, v222 AS TRUE, v223 AS TRUE, v224 AS TRUE, v225 AS TRUE, v226 AS TRUE, v227 AS TRUE, v228 AS TRUE, v229 AS TRUE, v230 AS TRUE, v231 AS TRUE, v232 AS TRUE, v233 AS TRUE, v234 AS TRUE, v235 AS TRUE, v236 AS TRUE, v237 AS TRUE, v238 AS TRUE, v239 AS TRUE, v240 AS TRUE, v241 AS TRUE, v242 AS TRUE, v243 AS TRUE, v244 AS TRUE, v245 AS TRUE, v246 AS TRUE, v247 AS TRUE, v248 AS TRUE, v249 AS TRUE, v250 AS TRUE, v251 AS TRUE, v252 AS TRUE)" -PL/pgSQL function inline_code_block line 17 at EXECUTE --- Error: 253 variables exceeds limit of 251 -DO $$ -DECLARE - pattern_vars text; - define_vars text; - query text; -BEGIN - SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), - string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') - INTO pattern_vars, define_vars - FROM generate_series(1, 253) i; - - query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (%s) - DEFINE %s)', pattern_vars, define_vars); - - EXECUTE query; -END; -$$; -ERROR: too many pattern variables -DETAIL: Maximum is 251. -CONTEXT: SQL statement "SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (v001 v002 v003 v004 v005 v006 v007 v008 v009 v010 v011 v012 v013 v014 v015 v016 v017 v018 v019 v020 v021 v022 v023 v024 v025 v026 v027 v028 v029 v030 v031 v032 v033 v034 v035 v036 v037 v038 v039 v040 v041 v042 v043 v044 v045 v046 v047 v048 v049 v050 v051 v052 v053 v054 v055 v056 v057 v058 v059 v060 v061 v062 v063 v064 v065 v066 v067 v068 v069 v070 v071 v072 v073 v074 v075 v076 v077 v078 v079 v080 v081 v082 v083 v084 v085 v086 v087 v088 v089 v090 v091 v092 v093 v094 v095 v096 v097 v098 v099 v100 v101 v102 v103 v104 v105 v106 v107 v108 v109 v110 v111 v112 v113 v114 v115 v116 v117 v118 v119 v120 v121 v122 v123 v124 v125 v126 v127 v128 v129 v130 v131 v132 v133 v134 v135 v136 v137 v138 v139 v140 v141 v142 v143 v144 v145 v146 v147 v148 v149 v150 v151 v152 v153 v154 v155 v156 v157 v158 v159 v160 v161 v162 v163 v164 v165 v166 v167 v168 v169 v170 v171 v172 v173 v174 v175 v176 v177 v178 v179 v180 v181 v182 v183 v184 v185 v186 v187 v188 v189 v190 v191 v192 v193 v194 v195 v196 v197 v198 v199 v200 v201 v202 v203 v204 v205 v206 v207 v208 v209 v210 v211 v212 v213 v214 v215 v216 v217 v218 v219 v220 v221 v222 v223 v224 v225 v226 v227 v228 v229 v230 v231 v232 v233 v234 v235 v236 v237 v238 v239 v240 v241 v242 v243 v244 v245 v246 v247 v248 v249 v250 v251 v252 v253) - DEFINE v001 AS TRUE, v002 AS TRUE, v003 AS TRUE, v004 AS TRUE, v005 AS TRUE, v006 AS TRUE, v007 AS TRUE, v008 AS TRUE, v009 AS TRUE, v010 AS TRUE, v011 AS TRUE, v012 AS TRUE, v013 AS TRUE, v014 AS TRUE, v015 AS TRUE, v016 AS TRUE, v017 AS TRUE, v018 AS TRUE, v019 AS TRUE, v020 AS TRUE, v021 AS TRUE, v022 AS TRUE, v023 AS TRUE, v024 AS TRUE, v025 AS TRUE, v026 AS TRUE, v027 AS TRUE, v028 AS TRUE, v029 AS TRUE, v030 AS TRUE, v031 AS TRUE, v032 AS TRUE, v033 AS TRUE, v034 AS TRUE, v035 AS TRUE, v036 AS TRUE, v037 AS TRUE, v038 AS TRUE, v039 AS TRUE, v040 AS TRUE, v041 AS TRUE, v042 AS TRUE, v043 AS TRUE, v044 AS TRUE, v045 AS TRUE, v046 AS TRUE, v047 AS TRUE, v048 AS TRUE, v049 AS TRUE, v050 AS TRUE, v051 AS TRUE, v052 AS TRUE, v053 AS TRUE, v054 AS TRUE, v055 AS TRUE, v056 AS TRUE, v057 AS TRUE, v058 AS TRUE, v059 AS TRUE, v060 AS TRUE, v061 AS TRUE, v062 AS TRUE, v063 AS TRUE, v064 AS TRUE, v065 AS TRUE, v066 AS TRUE, v067 AS TRUE, v068 AS TRUE, v069 AS TRUE, v070 AS TRUE, v071 AS TRUE, v072 AS TRUE, v073 AS TRUE, v074 AS TRUE, v075 AS TRUE, v076 AS TRUE, v077 AS TRUE, v078 AS TRUE, v079 AS TRUE, v080 AS TRUE, v081 AS TRUE, v082 AS TRUE, v083 AS TRUE, v084 AS TRUE, v085 AS TRUE, v086 AS TRUE, v087 AS TRUE, v088 AS TRUE, v089 AS TRUE, v090 AS TRUE, v091 AS TRUE, v092 AS TRUE, v093 AS TRUE, v094 AS TRUE, v095 AS TRUE, v096 AS TRUE, v097 AS TRUE, v098 AS TRUE, v099 AS TRUE, v100 AS TRUE, v101 AS TRUE, v102 AS TRUE, v103 AS TRUE, v104 AS TRUE, v105 AS TRUE, v106 AS TRUE, v107 AS TRUE, v108 AS TRUE, v109 AS TRUE, v110 AS TRUE, v111 AS TRUE, v112 AS TRUE, v113 AS TRUE, v114 AS TRUE, v115 AS TRUE, v116 AS TRUE, v117 AS TRUE, v118 AS TRUE, v119 AS TRUE, v120 AS TRUE, v121 AS TRUE, v122 AS TRUE, v123 AS TRUE, v124 AS TRUE, v125 AS TRUE, v126 AS TRUE, v127 AS TRUE, v128 AS TRUE, v129 AS TRUE, v130 AS TRUE, v131 AS TRUE, v132 AS TRUE, v133 AS TRUE, v134 AS TRUE, v135 AS TRUE, v136 AS TRUE, v137 AS TRUE, v138 AS TRUE, v139 AS TRUE, v140 AS TRUE, v141 AS TRUE, v142 AS TRUE, v143 AS TRUE, v144 AS TRUE, v145 AS TRUE, v146 AS TRUE, v147 AS TRUE, v148 AS TRUE, v149 AS TRUE, v150 AS TRUE, v151 AS TRUE, v152 AS TRUE, v153 AS TRUE, v154 AS TRUE, v155 AS TRUE, v156 AS TRUE, v157 AS TRUE, v158 AS TRUE, v159 AS TRUE, v160 AS TRUE, v161 AS TRUE, v162 AS TRUE, v163 AS TRUE, v164 AS TRUE, v165 AS TRUE, v166 AS TRUE, v167 AS TRUE, v168 AS TRUE, v169 AS TRUE, v170 AS TRUE, v171 AS TRUE, v172 AS TRUE, v173 AS TRUE, v174 AS TRUE, v175 AS TRUE, v176 AS TRUE, v177 AS TRUE, v178 AS TRUE, v179 AS TRUE, v180 AS TRUE, v181 AS TRUE, v182 AS TRUE, v183 AS TRUE, v184 AS TRUE, v185 AS TRUE, v186 AS TRUE, v187 AS TRUE, v188 AS TRUE, v189 AS TRUE, v190 AS TRUE, v191 AS TRUE, v192 AS TRUE, v193 AS TRUE, v194 AS TRUE, v195 AS TRUE, v196 AS TRUE, v197 AS TRUE, v198 AS TRUE, v199 AS TRUE, v200 AS TRUE, v201 AS TRUE, v202 AS TRUE, v203 AS TRUE, v204 AS TRUE, v205 AS TRUE, v206 AS TRUE, v207 AS TRUE, v208 AS TRUE, v209 AS TRUE, v210 AS TRUE, v211 AS TRUE, v212 AS TRUE, v213 AS TRUE, v214 AS TRUE, v215 AS TRUE, v216 AS TRUE, v217 AS TRUE, v218 AS TRUE, v219 AS TRUE, v220 AS TRUE, v221 AS TRUE, v222 AS TRUE, v223 AS TRUE, v224 AS TRUE, v225 AS TRUE, v226 AS TRUE, v227 AS TRUE, v228 AS TRUE, v229 AS TRUE, v230 AS TRUE, v231 AS TRUE, v232 AS TRUE, v233 AS TRUE, v234 AS TRUE, v235 AS TRUE, v236 AS TRUE, v237 AS TRUE, v238 AS TRUE, v239 AS TRUE, v240 AS TRUE, v241 AS TRUE, v242 AS TRUE, v243 AS TRUE, v244 AS TRUE, v245 AS TRUE, v246 AS TRUE, v247 AS TRUE, v248 AS TRUE, v249 AS TRUE, v250 AS TRUE, v251 AS TRUE, v252 AS TRUE, v253 AS TRUE)" -PL/pgSQL function inline_code_block line 17 at EXECUTE - CREATE TEMP TABLE stock_null (company TEXT, tdate DATE, price INTEGER); - INSERT INTO stock_null VALUES ('c1', '2023-07-01', 100); - INSERT INTO stock_null VALUES ('c1', '2023-07-02', NULL); -- NULL in middle - INSERT INTO stock_null VALUES ('c1', '2023-07-03', 200); - INSERT INTO stock_null VALUES ('c1', '2023-07-04', 150); - SELECT company, tdate, price, count(*) OVER w AS match_count - FROM stock_null - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (START UP DOWN) - DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < -PREV(price) - ); - company | tdate | price | match_count ----------+------------+-------+------------- - c1 | 07-01-2023 | 100 | 0 - c1 | 07-02-2023 | | 0 - c1 | 07-03-2023 | 200 | 0 - c1 | 07-04-2023 | 150 | 0 -(4 rows) - --- Overlapping match tests (requires multi-context for correct behavior) --- Using array flags: 'X' = ANY(flags) for multi-TRUE support --- Test 1: A B C D E | B C D | C D E F - three overlapping patterns --- Different end points: B C D (4), A B C D E (5), C D E F (6) -WITH test_overlap1 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']), - (6, ARRAY['F']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap1 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E | B C D | C D E F) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags), - F AS 'F' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 5 - 2 | {B} | | - 3 | {C} | | - 4 | {D} | | - 5 | {E} | | - 6 | {F} | | -(6 rows) - -WITH test_overlap1 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']), - (6, ARRAY['F']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap1 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B C D E | B C D | C D E F) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags), - F AS 'F' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 5 - 2 | {B} | 2 | 4 - 3 | {C} | 3 | 6 - 4 | {D} | | - 5 | {E} | | - 6 | {F} | | -(6 rows) - --- PAST LAST: only one match --- TO NEXT ROW with multi-context: three matches --- Row 1: A B C D E (1-5) --- Row 2: B C D (2-4) <- ends first! --- Row 3: C D E F (3-6) <- ends last! --- Test 1b: Longer pattern FAILS, shorter pattern should survive --- Pattern: A+ B C D E | B+ C --- A+ B C D E fails (no E found in sequence) --- B+ C matches at rows 2-3 --- Result: match 2-3 (B+ C) -WITH test_overlap1b AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap1b -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A+ B C D E | B+ C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | | - 2 | {B} | 2 | 3 - 3 | {C} | | - 4 | {D} | | - 5 | {X} | | -(5 rows) - --- Test 2: A B+ C | B+ D - long B sequence with different endings -WITH test_overlap2 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['B']), - (4, ARRAY['B']), - (5, ARRAY['B']), - (6, ARRAY['C']), - (7, ARRAY['B']), - (8, ARRAY['B']), - (9, ARRAY['B']), - (10, ARRAY['D']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap2 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B+ C | B+ D) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 6 - 2 | {B} | | - 3 | {B} | | - 4 | {B} | | - 5 | {B} | | - 6 | {C} | | - 7 | {B} | 7 | 10 - 8 | {B} | 8 | 10 - 9 | {B} | 9 | 10 - 10 | {D} | | -(10 rows) - --- Current result (correct): --- Row 1: A B+ C (1-6) --- Row 7-9: B+ D (7-10, 8-10, 9-10) --- Note: Row 2-6 cannot match B+ D because Row 6 is C, not D --- With absorption: 8-10 and 9-10 would be absorbed by 7-10 (earlier context covers later) --- Test 3: Greedy quantifier with late failure - A B C+ D | A B --- Pattern expects D after C+, but E comes instead ("betrayal") -WITH test_betrayal AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['C']), - (5, ARRAY['C']), - (6, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_betrayal -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C+ D | A B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 2 - 2 | {B} | | - 3 | {C} | | - 4 | {C} | | - 5 | {C} | | - 6 | {E} | | + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+ C) + DEFINE + A AS val IS NOT NULL, + B AS val IS NULL AND NEXT(val) IS NULL, + C AS val IS NULL AND NEXT(val) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 100 | 4 + 2 | | 0 + 3 | | 0 + 4 | | 0 + 5 | 200 | 0 + 6 | 300 | 0 (6 rows) --- A B C+ D fails at Row 6 (E instead of D) --- Question: Does it fallback to A B (1-2)? --- Test 4: Lexical Order test - A B C | A B C D E --- SQL standard: first matching alternative wins -WITH test_lexical AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_lexical -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C | A B C D E) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 3 - 2 | {B} | | - 3 | {C} | | - 4 | {D} | | - 5 | {E} | | -(5 rows) - --- SQL standard Lexical Order: A B C (1-3) wins (first alternative) --- Test 4b: Reversed pattern order - A B C D E | A B C -WITH test_lexical2 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_lexical2 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E | A B C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 5 - 2 | {B} | | - 3 | {C} | | - 4 | {D} | | - 5 | {E} | | -(5 rows) - --- SQL standard Lexical Order: A B C D E (1-5) wins (first alternative) --- Test 5: Multiple TRUE in single row (overlapping pattern variables) --- Each row matches multiple DEFINE conditions simultaneously -WITH test_multi_true AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','B']), -- A and B both TRUE - (2, ARRAY['B','C']), -- B and C both TRUE - (3, ARRAY['C','D']), -- C and D both TRUE - (4, ARRAY['D','E']), -- D and E both TRUE - (5, ARRAY['E','_']) -- E only - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_multi_true -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A,B} | 1 | 5 - 2 | {B,C} | | - 3 | {C,D} | | - 4 | {D,E} | | - 5 | {E,_} | | -(5 rows) - --- Row 1: A=T, B=T -> matches A --- Row 2: B=T, C=T -> matches B --- Row 3: C=T, D=T -> matches C --- Row 4: D=T, E=T -> matches D --- Row 5: E=T -> matches E --- Result: match 1-5 (A B C D E) --- Test 6: Diagonal pattern with multi-TRUE (shifted overlap) -WITH test_diagonal AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','_']), - (2, ARRAY['B','A']), - (3, ARRAY['C','B']), - (4, ARRAY['D','C']), - (5, ARRAY['_','D']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_diagonal -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B C D) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A,_} | 1 | 4 - 2 | {B,A} | 2 | 5 - 3 | {C,B} | | - 4 | {D,C} | | - 5 | {_,D} | | -(5 rows) - --- Possible matches: --- Start Row 1: A(1) B(2) C(3) D(4) -> 1-4 --- Start Row 2: A(2) B(3) C(4) D(5) -> 2-5 (because Row 2 has A too!) --- =================================================================== --- Context Absorption Tests --- =================================================================== --- Test absorption 1: Basic A+ pattern - later contexts absorbed by earlier -WITH test_absorb_basic AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['A']), - (5, ARRAY['B']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_basic -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A+) - DEFINE A AS 'A' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 4 - 2 | {A} | 2 | 4 - 3 | {A} | 3 | 4 - 4 | {A} | 4 | 4 - 5 | {B} | | -(5 rows) - --- Pattern A+ is absorbable (unbounded first element, only one unbounded) --- 4 matches: (1-4, 2-4, 3-4, 4-4) --- Test absorption 2: A+ B pattern - absorption with fixed suffix -WITH test_absorb_suffix AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_suffix -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A+ B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 4 - 2 | {A} | 2 | 4 - 3 | {A} | 3 | 4 - 4 | {B} | | - 5 | {X} | | -(5 rows) - --- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) --- All potential matches end at same row (row 4 with B) --- 3 matches: (1-4, 2-4, 3-4) --- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) -WITH test_absorb_alt AS ( - SELECT * FROM (VALUES - (1, ARRAY['B']), - (2, ARRAY['B']), - (3, ARRAY['B']), - (4, ARRAY['D']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_alt -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (B+ C | B+ D) - DEFINE - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {B} | 1 | 4 - 2 | {B} | 2 | 4 - 3 | {B} | 3 | 4 - 4 | {D} | | - 5 | {X} | | -(5 rows) - --- Both branches B+ C and B+ D are absorbable (B+ unbounded first) --- B+ D branch matches: 3 matches (1-4, 2-4, 3-4) --- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) -WITH test_no_absorb AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['B']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_no_absorb -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 4 - 2 | {B} | | - 3 | {B} | | - 4 | {B} | | - 5 | {X} | | -(5 rows) - --- Pattern A B+ is NOT absorbable (A bounded first, B+ unbounded but not first) --- Only Row 1 can start match (only row with A), so only one match: 1-4 --- Test absorption 5: GROUP merge enables absorption -WITH test_absorb_group AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['A']), - (4, ARRAY['B']), - (5, ARRAY['A']), - (6, ARRAY['B']), - (7, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_group -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN ((A B) (A B)+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 6 - 2 | {B} | | - 3 | {A} | 3 | 6 - 4 | {B} | | - 5 | {A} | | - 6 | {B} | | - 7 | {X} | | -(7 rows) - --- Pattern optimized: (A B) (A B)+ -> (A B){2,} --- 2 matches: 1-6 (3 reps), 3-6 (2 reps) --- Test absorption 6: Multiple unbounded - first element unbounded enables absorption -WITH test_multi_unbounded AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['B']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_multi_unbounded -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A+ B+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 4 - 2 | {A} | 2 | 4 - 3 | {B} | | - 4 | {B} | | - 5 | {X} | | -(5 rows) - --- 2 matches: 1-4, 2-4 (same endpoint 4) --- ============================================ --- Jacob's RPR Patterns (from jacob branch) --- ============================================ --- Test: A? (optional, greedy) -WITH jacob_optional AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_optional -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A?) - DEFINE A AS 'A' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 1 - 2 | {X} | | -(2 rows) - --- Expected: 1-1 (matches A) --- Test: A{2} (exact count) -WITH jacob_exact AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_exact -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A{2}) - DEFINE A AS 'A' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 2 - 2 | {A} | | - 3 | {A} | | - 4 | {X} | | -(4 rows) - --- Expected: 1-2 --- Test: A{1,3} (bounded range, greedy) -WITH jacob_bounded AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['A']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_bounded -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A{1,3}) - DEFINE A AS 'A' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 3 - 2 | {A} | | - 3 | {A} | | - 4 | {A} | 4 | 4 - 5 | {X} | | -(5 rows) - --- Expected: 1-3 (greedy takes max), then 4-4 --- Test: A | B (simple alternation) -WITH jacob_simple_alt AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_simple_alt -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A | B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 1 - 2 | {B} | 2 | 2 - 3 | {X} | | -(3 rows) - --- Expected: 1-1 (A), 2-2 (B) --- Test: A | B | C (three-way alternation) -WITH jacob_three_alt AS ( - SELECT * FROM (VALUES - (1, ARRAY['B']), - (2, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_three_alt -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A | B | C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {B} | 1 | 1 - 2 | {X} | | -(2 rows) - --- Expected: 1-1 (B) --- Test: A B C (concatenation) -WITH jacob_concat AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_concat -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 3 - 2 | {B} | | - 3 | {C} | | - 4 | {X} | | -(4 rows) - --- Expected: 1-3 --- Test: A B? C (optional middle) -WITH jacob_optional_mid AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['C']), - (3, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_optional_mid -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B? C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 2 - 2 | {C} | | - 3 | {X} | | -(3 rows) - --- Expected: 1-2 (A C, B skipped) --- Test: (A B){2} (nested group with quantifier) -WITH jacob_nested_group AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['A']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_nested_group -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN ((A B){2}) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 4 - 2 | {B} | | - 3 | {A} | | - 4 | {B} | | - 5 | {X} | | -(5 rows) - --- Expected: 1-4 (A B A B) --- Test: (A){3} (quantifier on grouped single element) -WITH jacob_group_quant AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_group_quant -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN ((A){3}) - DEFINE A AS 'A' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 3 - 2 | {A} | | - 3 | {A} | | - 4 | {X} | | -(4 rows) - --- Expected: 1-3 --- Test: A B C | A B C D E (lexical order - first alt wins) -WITH jacob_lex_first AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_lex_first -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C | A B C D E) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 3 - 2 | {B} | | - 3 | {C} | | - 4 | {D} | | - 5 | {E} | | -(5 rows) - --- Expected: 1-3 (A B C wins by lexical order) --- Test: A B C D E | A B C (lexical order - longer first wins) -WITH jacob_lex_long AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_lex_long -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E | A B C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 5 - 2 | {B} | | - 3 | {C} | | - 4 | {D} | | - 5 | {E} | | -(5 rows) - --- Expected: 1-5 (A B C D E wins by lexical order) --- ============================================ --- Alternation with quantifiers (BUG cases from Jacob's tests) --- ============================================ --- Test: (A | B)+ C - alternation inside quantified group followed by C -WITH jacob_alt_quant AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['A']), - (4, ARRAY['C']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_alt_quant -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN ((A | B)+ C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 4 - 2 | {B} | | - 3 | {A} | | - 4 | {C} | | -(4 rows) - --- Expected: 1-4 (A B A C) --- Test: ((A | B) C)+ - alternation inside group with outer quantifier -WITH jacob_alt_group AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['C']), - (3, ARRAY['B']), - (4, ARRAY['C']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_alt_group -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (((A | B) C)+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A} | 1 | 4 - 2 | {C} | | - 3 | {B} | | - 4 | {C} | | - 5 | {X} | | -(5 rows) - --- Expected: 1-4 (A C B C) --- ============================================ --- RELUCTANT quantifiers --- ============================================ --- Test: A+? B (reluctant) - B available at row 3, A exits early -WITH jacob_reluctant AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','_']), - (2, ARRAY['A','_']), - (3, ARRAY['A','B']), - (4, ARRAY['B','_']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_reluctant -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A+? B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A,_} | 1 | 3 - 2 | {A,_} | | - 3 | {A,B} | | - 4 | {B,_} | | -(4 rows) - --- Test: A{1,3}? B (reluctant bounded) - same data, bounded quantifier -WITH jacob_reluctant_bounded AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','_']), - (2, ARRAY['A','_']), - (3, ARRAY['A','B']), - (4, ARRAY['B','_']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_reluctant_bounded -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A{1,3}? B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - id | flags | match_start | match_end -----+-------+-------------+----------- - 1 | {A,_} | 1 | 3 - 2 | {A,_} | | - 3 | {A,B} | | - 4 | {B,_} | | -(4 rows) - --- ============================================ --- Nested quantifiers (pathological patterns) --- ============================================ --- These patterns previously caused segfault or infinite loop. --- Now they are either optimized at compile time or handled safely at runtime. --- Test: (A*)* - nested unbounded quantifiers (optimized to A*) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A*)*) - DEFINE A AS TRUE -); - v | c ----+--- - 1 | 5 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 -(5 rows) - --- Test: (A*)+ - inner nullable, outer requires one (optimized to A*) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A*)+) - DEFINE A AS TRUE -); - v | c ----+--- - 1 | 5 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 -(5 rows) - --- Test: (A+)* - outer nullable (optimized to A*) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A+)*) - DEFINE A AS TRUE -); - v | c ----+--- - 1 | 5 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 -(5 rows) - --- Test: (A+)+ - both require match (optimized to A+) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A+)+) - DEFINE A AS TRUE -); - v | c ----+--- - 1 | 5 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 -(5 rows) - --- Test: (A* B*)* - complex nested pattern (runtime protection) --- Not optimized but handled safely by empty-match loop prevention -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A* B*)*) - DEFINE A AS TRUE, B AS TRUE -); - v | c ----+--- - 1 | 5 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 -(5 rows) - --- Test: (((A)*)*)* - triple nested (optimized through recursive optimization) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 3) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((((A)*)*)*) - DEFINE A AS TRUE -); - v | c ----+--- - 1 | 3 - 2 | 0 - 3 | 0 -(3 rows) - +DROP TABLE rpr_consec_null; diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index ab878443379..3383b242ef0 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -1449,7 +1449,7 @@ ERROR: syntax error at or near "*" LINE 6: PATTERN (A+ *) ^ -- Expected: ERROR: syntax error at or near "*" --- ? ? (invalid combination) +-- ? ? (parsed as ?? reluctant quantifier) SELECT COUNT(*) OVER w FROM rpr_reluctant WINDOW w AS ( @@ -2203,7 +2203,32 @@ SELECT pg_get_viewdef('rpr_serial_v8'::regclass); (1 row) DROP VIEW rpr_serial_v8; -DROP TABLE rpr_serial; +-- Reluctant {1}? quantifier deparse through ruleutils +CREATE VIEW rpr_quant_reluctant_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{1}? B) + DEFINE A AS val > 0, B AS val > 0); +SELECT pg_get_viewdef('rpr_quant_reluctant_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{1}? b) + + DEFINE + + a AS (val > 0), + + b AS (val > 0) ); +(1 row) + +DROP VIEW rpr_quant_reluctant_v; -- Materialized view (if supported) CREATE TABLE rpr_mview (id INT, val INT); INSERT INTO rpr_mview VALUES (1, 10), (2, 20), (3, 30); @@ -2251,6 +2276,52 @@ SELECT * FROM rpr_mview_v1 ORDER BY id; DROP MATERIALIZED VIEW rpr_mview_v1; DROP TABLE rpr_mview; +-- CREATE TABLE AS SELECT with RPR +CREATE TABLE rpr_ctas (id INT, val INT); +INSERT INTO rpr_ctas VALUES (1, 10), (2, 20), (3, 15), (4, 25); +CREATE TABLE rpr_ctas_result AS +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_ctas +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS val > PREV(val) +); +SELECT * FROM rpr_ctas_result ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 +(4 rows) + +-- INSERT INTO ... SELECT with RPR +CREATE TABLE rpr_insert_target (id INT, val INT, cnt BIGINT); +INSERT INTO rpr_insert_target +SELECT id, val, count(*) OVER w +FROM rpr_ctas +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS val > PREV(val) +); +SELECT * FROM rpr_insert_target ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 +(4 rows) + +DROP TABLE rpr_ctas_result; +DROP TABLE rpr_insert_target; +DROP TABLE rpr_ctas; -- Prepared statements (tests outfuncs.c / readfuncs.c) CREATE TABLE rpr_prep (id INT, val INT); INSERT INTO rpr_prep VALUES (1, 10), (2, 20), (3, 30); @@ -2608,6 +2679,57 @@ SELECT pg_get_viewdef('rpr_multiwin_v'::regclass); DROP VIEW rpr_multiwin_v; DROP TABLE rpr_multiwin; +-- {n} quantifier display in view +CREATE VIEW rpr_quant_n_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE A AS val > 0); +SELECT pg_get_viewdef('rpr_quant_n_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS (val > 0) ); +(1 row) + +DROP VIEW rpr_quant_n_v; +-- {n,} quantifier display in view +CREATE VIEW rpr_quant_n_plus_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE A AS val > 0); +SELECT pg_get_viewdef('rpr_quant_n_plus_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{2,}) + + DEFINE + + a AS (val > 0) ); +(1 row) + +DROP VIEW rpr_quant_n_plus_v; +DROP TABLE rpr_serial; -- ============================================================ -- Error Cases Tests -- ============================================================ @@ -2694,6 +2816,7 @@ WINDOW w AS ( ERROR: pattern variable qualified column reference "a.val" is not supported in DEFINE clause LINE 7: DEFINE A AS A.val > 0 ^ +-- Expected: ERROR: pattern variable qualified column reference "a.val" is not supported -- PATTERN-only variable qualified name: not supported even without DEFINE entry SELECT COUNT(*) OVER w FROM rpr_err @@ -2706,6 +2829,7 @@ WINDOW w AS ( ERROR: pattern variable qualified column reference "b.val" is not supported in DEFINE clause LINE 7: DEFINE A AS B.val > 0 ^ +-- Expected: ERROR: pattern variable qualified column reference "b.val" is not supported -- DEFINE-only variable qualified name: still a pattern variable, not a range variable SELECT COUNT(*) OVER w FROM rpr_err @@ -2718,6 +2842,7 @@ WINDOW w AS ( ERROR: pattern variable qualified column reference "b.val" is not supported in DEFINE clause LINE 7: DEFINE A AS val > 0, B AS B.val > 0 ^ +-- Expected: ERROR: pattern variable qualified column reference "b.val" is not supported -- FROM-clause range variable qualified name: not allowed (prohibited by §6.5) SELECT COUNT(*) OVER w FROM rpr_err @@ -2730,6 +2855,7 @@ WINDOW w AS ( ERROR: range variable qualified column reference "rpr_err.val" is not allowed in DEFINE clause LINE 7: DEFINE A AS rpr_err.val > 0 ^ +-- Expected: ERROR: range variable qualified column reference "rpr_err.val" is not allowed -- Semantic errors -- Undefined column in DEFINE SELECT COUNT(*) OVER w @@ -2769,7 +2895,7 @@ WINDOW w AS ( ERROR: aggregate functions are not allowed in DEFINE LINE 7: DEFINE A AS COUNT(*) > 0 ^ --- Expected: ERROR or works depending on implementation +-- Expected: ERROR: aggregate functions are not allowed in DEFINE -- Subquery in DEFINE (NOT SUPPORTED) SELECT COUNT(*) OVER w FROM rpr_err @@ -3019,6 +3145,26 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Data execution: SEQ flatten produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + -- ALT flatten: (A | (B | C))+ -> (a | b | c)+ EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -3049,6 +3195,26 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Data execution: ALT dedup produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | A)+) DEFINE A AS val <= 50, B AS val > 50); + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + -- Quantifier multiply: (A{2}){3} -> a{6} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -3468,6 +3634,26 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Data execution: GROUP unwrap produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 20 | 1 + 3 | 30 | 1 + 4 | 40 | 1 + 5 | 50 | 1 + 6 | 60 | 1 + 7 | 70 | 1 + 8 | 80 | 1 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + -- Reluctant optimization bypass: VAR merge -- A+? A stays as a+? a (greedy A+ A merges to a{2,}) EXPLAIN (COSTS OFF) @@ -3612,6 +3798,134 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Duplicate GROUP removal: ((A | B)+ | (A | B)+) -> (a | b)+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B)+ | (A | B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive VAR merge with zero-min: A* A+ -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Consecutive VAR merge (4-element): A A{2} A+ A{3} -> a{7,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A{2} A+ A{3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{7,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- PREFIX+SUFFIX merge (5-way): A B A B (A B)+ A B A B -> (a b){5,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B A B (A B)+ A B A B) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){5,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Unwrap single-item ALT after dedup: (A | A)+ -> a+ +-- ALT dedup reduces to single-item, then GROUP unwrap +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | A)+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- GROUP{1,1} to SEQ with flatten: ((A B)(C D)) -> a b c d +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)(C D))) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested ALT pattern: ((A B) | C) D | A B C +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B) | C) D | A B C) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a b | c) d | a b c) + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested ALT with unbounded: ((A+ B) | C) D | A B C +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a+" b | c) d | a b c) + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + -- ============================================================ -- Absorption Flag Display Tests -- ============================================================ @@ -3801,6 +4115,28 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Reluctant {1}? quantifier deparse +-- A{1}? is a reluctant {1,1} quantifier. The deparse code must +-- output "{1}" explicitly to disambiguate from a bare "?" quantifier +-- (which would mean {0,1}). +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM rpr_plan +WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1}? B) + DEFINE A AS val > 0, B AS val > 0 +); + QUERY PLAN +-------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY val ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{1}? b + -> Sort + Sort Key: val + -> Seq Scan on rpr_plan +(6 rows) + -- ============================================================ -- Absorption Analysis Tests -- ============================================================ @@ -4529,7 +4865,8 @@ ORDER BY category; ERROR: syntax error at or near "GROUP" LINE 12: GROUP BY category ^ --- Expected: ERROR (GROUP BY with window RPR not supported) +-- Expected: ERROR: syntax error at or near "GROUP" +-- (GROUP BY after WINDOW clause is not valid SQL syntax) -- ============================================================ -- Subquery and CTE Tests -- Files: planner.c, prepjointree.c @@ -5007,7 +5344,8 @@ CREATE TABLE rpr_sort (id INT, category VARCHAR(10), val INT); INSERT INTO rpr_sort VALUES (1, 'A', 30), (2, 'B', 20), (3, 'A', 10), (4, 'B', 40), (5, 'A', 50), (6, 'B', 60); --- RPR with GROUP BY +-- RPR with GROUP BY (aggregate in DEFINE → ERROR before GROUP BY interaction) +-- Expected: ERROR: aggregate functions are not allowed in DEFINE SELECT category, COUNT(*) as group_cnt, MAX(val) as max_val, @@ -5024,7 +5362,8 @@ ORDER BY category; ERROR: aggregate functions are not allowed in DEFINE LINE 11: DEFINE A AS COUNT(*) > 0 ^ --- RPR with HAVING +-- RPR with HAVING (same aggregate-in-DEFINE error) +-- Expected: ERROR: aggregate functions are not allowed in DEFINE SELECT category, COUNT(*) as group_cnt, COUNT(*) OVER w as window_cnt @@ -5787,6 +6126,3 @@ FROM (SELECT id, val, (2 rows) DROP TABLE rpr_plan; --- ============================================================ --- End of rpr_base.sql --- ============================================================ diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out index 340408cc454..817269021f4 100644 --- a/src/test/regress/expected/rpr_explain.out +++ b/src/test/regress/expected/rpr_explain.out @@ -6,8 +6,9 @@ -- This test suite validates EXPLAIN output for RPR queries, -- including NFA statistics shown in EXPLAIN ANALYZE: -- - NFA States: peak, total, merged --- - NFA Contexts: peak, total, absorbed, skipped +-- - NFA Contexts: peak, total, pruned -- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) +-- - NFA: absorbed (len min/max/avg), skipped (len min/max/avg) -- - Pattern deparse formatting -- - Multiple output formats (text, JSON, XML) -- @@ -434,7 +435,7 @@ WINDOW w AS ( (9 rows) DROP VIEW rpr_v; --- Grouped pattern with quantifier - state merging +-- Grouped pattern with quantifier - state count with grouping CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) @@ -637,7 +638,7 @@ WINDOW w AS ( (9 rows) DROP VIEW rpr_v; --- High state merging - alternation with plus quantifier +-- High state count - alternation with plus quantifier CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) @@ -758,7 +759,7 @@ WINDOW w AS ( DROP VIEW rpr_v; -- ============================================================ --- Context Statistics Tests (peak, total, absorbed, skipped) +-- Context Statistics Tests (peak, total, pruned + absorbed/skipped) -- ============================================================ -- Context absorption with unbounded quantifier at start CREATE TEMP VIEW rpr_v AS @@ -1047,7 +1048,7 @@ WINDOW w AS ( (9 rows) DROP VIEW rpr_v; --- Mix of short and long matches +-- Uniform match length with mismatches from gap rows (v%20 = 11..15) CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) @@ -1094,8 +1095,8 @@ DROP VIEW rpr_v; -- ============================================================ -- Mismatch Length Statistics Tests -- ============================================================ --- Pattern that causes mismatches with length > 1 --- Mismatch happens when partial match fails after processing multiple rows +-- Pattern with complete match every cycle: 0 mismatched +-- A(1,2,3) B(4,5) C(6) repeats perfectly; X rows are pruned, not mismatched CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM ( @@ -1374,87 +1375,6 @@ WINDOW w AS ( ] (1 row) -DROP VIEW rpr_v; --- ============================================================ --- XML Format Tests --- ============================================================ --- XML format output -CREATE TEMP VIEW rpr_v AS -SELECT count(*) OVER w -FROM generate_series(1, 30) AS s(v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B) - DEFINE A AS v % 2 = 1, B AS v % 2 = 0 -); -SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; - line ------------------- - PATTERN (a b) -(1 row) - -SELECT rpr_explain_filter(' -EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) -SELECT count(*) OVER w -FROM generate_series(1, 30) AS s(v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B) - DEFINE A AS v % 2 = 1, B AS v % 2 = 0 -)'); - rpr_explain_filter --------------------------------------------------------------------------------- - + - + - + - WindowAgg + - false + - false + - 30.00 + - 1 + - false + - w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)+ - a b + - Memory + - 0 + - 2 + - 31 + - 0 + - 2 + - 31 + - 0 + - 15 + - 0 + - 15 + - 0 + - 2 + - 2 + - 2.0 + - 1 + - 1 + - 1.0 + - + - + - Function Scan + - Outer + - false + - false + - generate_series + - s + - 30.00 + - 1 + - false + - + - + - + - + - + - + - -(1 row) - DROP VIEW rpr_v; -- JSON format with mismatch statistics -- Pattern A B C expects 1,2,3 but gets 1,2,4 twice causing mismatches @@ -1618,6 +1538,87 @@ WINDOW w AS ( ] (1 row) +DROP VIEW rpr_v; +-- ============================================================ +-- XML Format Tests +-- ============================================================ +-- XML format output +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); + rpr_explain_filter +-------------------------------------------------------------------------------- + + + + + + + WindowAgg + + false + + false + + 30.00 + + 1 + + false + + w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)+ + a b + + Memory + + 0 + + 2 + + 31 + + 0 + + 2 + + 31 + + 0 + + 15 + + 0 + + 15 + + 0 + + 2 + + 2 + + 2.0 + + 1 + + 1 + + 1.0 + + + + + + Function Scan + + Outer + + false + + false + + generate_series + + s + + 30.00 + + 1 + + false + + + + + + + + + + + + + + +(1 row) + DROP VIEW rpr_v; -- ============================================================ -- Multiple Partitions Tests diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index 03ee174d359..c8464e8fcf4 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -322,6 +322,161 @@ WINDOW w AS ( 5 | {_} | | (5 rows) +-- Absorption with fixed suffix: A+ B +WITH test_absorb_suffix AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_suffix +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Per-branch absorption with ALT: B+ C | B+ D +WITH test_absorb_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (B+ C | B+ D) + DEFINE + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 4 + 2 | {B} | 2 | 4 + 3 | {B} | 3 | 4 + 4 | {D} | | + 5 | {X} | | +(5 rows) + +-- Non-absorbable: A B+ (unbounded not in first position) +WITH test_no_absorb AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_no_absorb +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- GROUP merge enables absorption: (A B) (A B)+ optimized to (A B){2,} +WITH test_absorb_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B) (A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | | + 6 | {B} | | + 7 | {X} | | +(7 rows) + +-- Multiple unbounded: A+ B+ (first element unbounded enables absorption) +WITH test_multi_unbounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_unbounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + -- ============================================================ -- Context Lifecycle -- ============================================================ @@ -674,7 +829,8 @@ WINDOW w AS ( (5 rows) -- Optional reluctant group: (A B)?? C --- nfa_advance_begin: reluctant prefers skip (0 iterations) over enter +-- nfa_advance_begin: reluctant tries skip first, but skip path needs C +-- at row 1 which is A → skip fails. Enter path succeeds: A(1) B(2) C(3). WITH test_optional_reluctant AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -734,6 +890,44 @@ WINDOW w AS ( 4 | {B} | | (4 rows) +-- Reluctant optional group skip-to-FIN +-- When a reluctant optional group's skip path reaches FIN, the group +-- entry path is abandoned (nodeWindowAgg.c nfa_advance_begin). +-- Pattern: C (A B)?? -- after C matches, the reluctant group (A B)?? +-- prefers to skip. Skip goes to FIN (group is last element), so +-- the match completes with just C. +WITH test_begin_skip_fin AS ( + SELECT * FROM (VALUES + (1, ARRAY['C']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['C','A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_begin_skip_fin +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (C (A B)??) + DEFINE + C AS 'C' = ANY(flags), + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {C} | 1 | 1 + 2 | {A} | | + 3 | {B} | | + 4 | {C,A} | 4 | 4 + 5 | {B} | | +(5 rows) + -- ============================================================ -- Match Phase -- ============================================================ @@ -1664,6 +1858,64 @@ WINDOW w AS ( 4 | {B} | | (4 rows) +-- A+? B (reluctant plus): exits A at first B availability +-- (Same scenario as greedy-vs-reluctant comparison above; retained for +-- standalone quantifier coverage alongside A{1,3}? and A{2,3}? below) +WITH test_reluctant_plus AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_reluctant_plus +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- A{1,3}? B (reluctant bounded): same data, bounded quantifier +WITH test_reluctant_bounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_reluctant_bounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + -- ============================================================ -- Pathological Pattern Runtime Protection -- ============================================================ @@ -2030,6 +2282,283 @@ WINDOW w AS ( 3 | {B} | 3 | 3 (3 rows) +-- Overlapping match: A B C D E | B C D | C D E F (SKIP PAST LAST ROW) +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +-- Same with SKIP TO NEXT ROW: three overlapping matches +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | 2 | 4 + 3 | {C} | 3 | 6 + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +-- Longer pattern fails, shorter survives: A+ B C D E | B+ C +WITH test_overlap1b AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1b +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C D E | B+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {B} | 2 | 3 + 3 | {C} | | + 4 | {D} | | + 5 | {X} | | +(5 rows) + +-- Long B sequence with different endings: A B+ C | B+ D +WITH test_overlap2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['B']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+ C | B+ D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {B} | | + 6 | {C} | | + 7 | {B} | 7 | 10 + 8 | {B} | 8 | 10 + 9 | {B} | 9 | 10 + 10 | {D} | | +(10 rows) + +-- Greedy with late failure ("betrayal"): A B C+ D | A B +WITH test_betrayal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['C']), + (5, ARRAY['C']), + (6, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_betrayal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C+ D | A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {C} | | + 4 | {C} | | + 5 | {C} | | + 6 | {E} | | +(6 rows) + +-- Multiple TRUE per row: overlapping pattern variables +WITH test_multi_true AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B','C']), + (3, ARRAY['C','D']), + (4, ARRAY['D','E']), + (5, ARRAY['E','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_true +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 5 + 2 | {B,C} | | + 3 | {C,D} | | + 4 | {D,E} | | + 5 | {E,_} | | +(5 rows) + +-- Diagonal pattern with shifted multi-TRUE overlap +WITH test_diagonal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['B','A']), + (3, ARRAY['C','B']), + (4, ARRAY['D','C']), + (5, ARRAY['_','D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_diagonal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {B,A} | 2 | 5 + 3 | {C,B} | | + 4 | {D,C} | | + 5 | {_,D} | | +(5 rows) + +-- ((A | B) C)+ - alternation inside group with outer quantifier +WITH test_alt_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), + (3, ARRAY['B']), + (4, ARRAY['C']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_alt_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B) C)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {C} | | + 3 | {B} | | + 4 | {C} | | + 5 | {X} | | +(5 rows) + -- ============================================================ -- Deep Nested Groups -- ============================================================ @@ -2227,6 +2756,69 @@ WINDOW w AS ( 5 | {C} | | (5 rows) +-- (A B){2} - group with exact quantifier +WITH test_group_exact AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_group_exact +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Nested END->END fast-forward +-- When an inner group has a nullable body and count < min, the +-- fast-forward path exits through the outer END, incrementing +-- the outer group's count (nodeWindowAgg.c nfa_advance_end). +-- Pattern: ((A?){2,3}){2,3} -- nested groups, neither collapses +-- because the optimizer cannot safely multiply non-exact quantifiers. +-- Data has no A rows, forcing all-empty iterations via fast-forward. +WITH test_nested_ff AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_ff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A?){2,3}){2,3}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | | + 2 | {B} | | + 3 | {B} | | +(3 rows) + -- ============================================================ -- SKIP Options (Runtime) -- ============================================================ @@ -2390,6 +2982,8 @@ ORDER BY mode, id; -- ============================================================ -- INITIAL Mode (Runtime) +-- Placeholder: INITIAL is not yet implemented (syntax error). +-- Kept here so tests convert to runtime tests when implemented. -- ============================================================ -- INITIAL mode (not yet supported - produces syntax error) WITH test_initial_mode AS ( @@ -2597,6 +3191,80 @@ WINDOW w AS ( 5 | {B} | | (5 rows) +-- N FOLLOWING + SKIP TO NEXT ROW: overlapping matches bounded by frame +-- Row 1: frame [1,4], A(1-3) B(4) -> match +-- Row 2: frame [2,5], A(2-3) B(4) -> match +-- Row 3: frame [3,6], A(3) B(4) -> match +-- Row 5: frame [5,6], A(5) B(6) -> match +WITH test_n_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_n_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- Frame exactly 1 row short of potential match +-- From row 1: A A A B needs 4 rows but frame holds 3 -> no match +-- From row 2: A A B fits in 3-row frame -> match +WITH test_frame_one_short AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_one_short +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + -- ============================================================ -- Special Partition Cases -- ============================================================ @@ -3407,7 +4075,8 @@ WINDOW w AS ( -- (A?){0,3}: min=0, nullable inner. -- A never matches. A? matches empty, min=0 satisfied immediately. --- Expected: empty match (match_start = match_end) for every row +-- Per standard: empty match expected for every row. +-- XXX: visited bitmap blocks empty iteration → no match (same as {2,3}) WITH test_728_min0 AS ( SELECT * FROM (VALUES (1, ARRAY['B']), @@ -3436,7 +4105,8 @@ WINDOW w AS ( -- (A?){1,3}: min=1, nullable inner. -- A never matches. Need 1 empty iteration to satisfy min=1. --- Expected: empty match for every row +-- Per standard: empty match expected for every row. +-- XXX: visited bitmap blocks empty iteration → no match (same as {2,3}) WITH test_728_min1 AS ( SELECT * FROM (VALUES (1, ARRAY['B']), @@ -3497,7 +4167,8 @@ WINDOW w AS ( -- (A?){2,3} mixed: some rows match A, some don't -- Rows 1-2: A matches, greedy takes 2 → min satisfied -- Row 3: A doesn't match, needs 2 empty iterations for min=2 --- Expected: all rows produce matches +-- XXX: Row 3 fails due to visited bitmap (same as pure empty {2,3}) +-- Row 4: A matches 1 real iter + 1 ff empty exit → match 4-4 WITH test_728_min2_mixed AS ( SELECT * FROM (VALUES (1, ARRAY['A']), diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index a263074e3e9..8dbd3f9036a 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -1,5 +1,10 @@ -- --- Test for row pattern definition clause +-- Test for row pattern recognition: WINDOW clause integration and +-- real-world scenario tests using stock price data. +-- +-- Parser/planner tests: rpr_base.sql +-- NFA engine tests: rpr_nfa.sql +-- EXPLAIN statistics tests: rpr_explain.sql -- CREATE TEMP TABLE stock ( @@ -30,6 +35,10 @@ INSERT INTO stock VALUES ('company2', '2023-07-10', 1300); SELECT * FROM stock; +-- +-- Basic pattern matching with PREV/NEXT +-- + -- basic test using PREV SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, nth_value(tdate, 2) OVER w AS nth_second @@ -175,7 +184,7 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER HIGH AS price > 150 ); --- basic test with none-greedy pattern +-- basic test with fixed-length pattern (A A A = exactly 3) SELECT company, tdate, price, count(*) OVER w FROM stock WINDOW w AS ( @@ -294,7 +303,9 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER UPDOWN AS price > PREV(price) AND price > NEXT(price) ); --- using AFTER MATCH SKIP TO NEXT ROW +-- using AFTER MATCH SKIP TO NEXT ROW (same pattern as above; +-- match length is always 2, so result is identical to SKIP PAST LAST ROW. +-- SKIP TO NEXT ROW's distinct effect is tested in backtracking section.) SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( @@ -308,8 +319,92 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER UPDOWN AS price > PREV(price) AND price > NEXT(price) ); --- match everything +-- PREV returns NULL at partition's first row (null_slot path) +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (BOUNDARY REST+) + DEFINE + BOUNDARY AS PREV(price) IS NULL, + REST AS PREV(price) IS NOT NULL +); + +-- NEXT returns NULL at partition's last row (null_slot path) +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ BOUNDARY) + DEFINE + A AS NEXT(price) IS NOT NULL, + BOUNDARY AS NEXT(price) IS NULL +); + +-- DESC order: PREV refers to the row with later date +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate DESC + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START DOWN+ UP+) + DEFINE + START AS TRUE, + DOWN AS price < PREV(price), + UP AS price > PREV(price) +); + +-- Multiple partitions with unequal sizes +WITH multi_part AS ( + SELECT * FROM (VALUES + ('a', 1, 10), ('a', 2, 20), ('a', 3, 15), + ('b', 1, 5), + ('c', 1, 100), ('c', 2, 200), ('c', 3, 150), ('c', 4, 140), ('c', 5, 300) + ) AS t(grp, id, val) +) +SELECT grp, id, val, count(*) OVER w +FROM multi_part +WINDOW w AS ( + PARTITION BY grp + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS val <= NEXT(val), + B AS val > PREV(val) OR val < PREV(val) +); + +-- FLOAT/NUMERIC DEFINE conditions +WITH float_data AS ( + SELECT * FROM (VALUES + (1, 1.0::float8), (2, 1.5), (3, 1.4999), (4, 1.50001), (5, 0.1) + ) AS t(id, val) +) +SELECT id, val, count(*) OVER w +FROM float_data +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS val > PREV(val) * 0.99 +); + +-- +-- SKIP TO / Backtracking / Frame boundary +-- +-- match everything SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( @@ -474,6 +569,25 @@ UP AS price > PREV(price), DOWN AS price < PREV(price) ); +-- row_number() within RPR reduced frame +SELECT company, tdate, price, row_number() OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- +-- SQL Integration: JOIN, CTE, LATERAL +-- + -- JOIN case CREATE TEMP TABLE t1 (i int, v1 int); CREATE TEMP TABLE t2 (j int, v2 int); @@ -552,6 +666,10 @@ SELECT id, i, j, count(*) OVER w COND AS PREV(i + j + 1) < 10 ); +-- +-- Large-scale / scalability tests +-- + -- Smoke test for larger partitions. WITH s AS ( SELECT v, count(*) OVER w AS c @@ -607,8 +725,9 @@ result AS ( SELECT match_first, match_last, match_len FROM result WHERE match_len > 0; -- --- Using IGNORE NULLS +-- IGNORE NULLS -- + -- no NULL rows case. The result should be identical with "basic test using PREV" SELECT company, tdate, price, first_value(price) IGNORE NULLS OVER w, last_value(price) IGNORE NULLS OVER w, @@ -626,7 +745,7 @@ SELECT company, tdate, price, first_value(price) IGNORE NULLS OVER w, ); -- nth_value with IGNORE NULLS option wants to find the second row but --- due a NULL in the midlle, it returns the third row. +-- due to a NULL in the middle, it returns the third row. WITH data AS ( SELECT * FROM (VALUES (10, 1), (11, NULL), (12, 3), (13, 4) @@ -643,8 +762,8 @@ WITH data AS ( ); -- nth_value with IGNORE NULLS option wants to find the third row but --- due a NULL in the midlle, it reaches the end of reduced frame and --- return NULL +-- due to a NULL in the middle, it reaches the end of reduced frame and +-- returns NULL WITH data AS ( SELECT * FROM (VALUES (10, 1), (11, NULL), (12, 3), (13, 4) @@ -677,1503 +796,115 @@ WINDOW w AS ( DOWN AS price < PREV(price) ); --- View and pg_get_viewdef tests. -CREATE TEMP VIEW v_window AS -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, - nth_value(tdate, 2) OVER w AS nth_second - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); - -SELECT * FROM v_window; -SELECT pg_get_viewdef('v_window'); - --- --- Pattern optimization tests --- VIEW shows original pattern, EXPLAIN shows optimized pattern --- - --- Test: duplicate alternatives removal (A | B | A)+ -> (A | B)+ -CREATE TEMP VIEW v_opt_dup AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | B | A)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_dup'); -- original: ((a | b | a)+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup; -- optimized: ((a | b)+) - --- Test: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ -CREATE TEMP VIEW v_opt_dup_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | B)+ | (A | B)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_dup_group'); -- original: ((a | b)+ | (a | b)+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup_group; -- optimized: ((a | b)+) - --- Test: consecutive vars merge (A A A) -> A{3} -CREATE TEMP VIEW v_opt_merge AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A A A) - DEFINE - A AS price >= 140 AND price <= 150 -); -SELECT pg_get_viewdef('v_opt_merge'); -- original: (a a a) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge; -- optimized: a{3} - --- Test: quantified vars merge (A A+ A) -> A{3,} -CREATE TEMP VIEW v_opt_merge_quant AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A A+ A) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_merge_quant'); -- original: (a a+ a) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_quant; -- optimized: a{3,} - --- Test: merge two unbounded (A+ A+) -> A{2,} -CREATE TEMP VIEW v_opt_merge_unbounded AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company +-- IGNORE NULLS + first_value where first value in reduced frame is NULL +WITH data AS ( + SELECT * FROM (VALUES + (1, NULL), (2, NULL), (3, 30), (4, 40) + ) AS t(id, val)) +SELECT id, val, + first_value(val) IGNORE NULLS OVER w AS fv_ignull, + count(*) OVER w +FROM data +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A+ A+) - DEFINE - A AS price > 100 + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE ); -SELECT pg_get_viewdef('v_opt_merge_unbounded'); -- original: (a+ a+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_unbounded; -- optimized: a{2,} --- Test: merge with zero-min (A* A+) -> A+ -CREATE TEMP VIEW v_opt_merge_star AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company +-- IGNORE NULLS + all values NULL in reduced frame +WITH data AS ( + SELECT * FROM (VALUES + (1, NULL), (2, NULL), (3, NULL) + ) AS t(id, val)) +SELECT id, val, + first_value(val) IGNORE NULLS OVER w AS fv_ignull, + last_value(val) IGNORE NULLS OVER w AS lv_ignull, + count(*) OVER w +FROM data +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A* A+) - DEFINE - A AS price > 100 + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE ); -SELECT pg_get_viewdef('v_opt_merge_star'); -- original: (a* a+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_star; -- optimized: a+ --- Test: complex merge (A A{2} A+ A{3}) -> A{7,} -CREATE TEMP VIEW v_opt_merge_complex AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A A{2} A+ A{3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_merge_complex'); -- original: (a a{2} a+ a{3}) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_complex; -- optimized: a{7,} +-- +-- last_value IGNORE NULLS with reduced frame containing all NULLs +-- Exercises ignorenulls_getfuncarginframe SEEK_TAIL out-of-frame path +-- when notnull_relpos >= num_reduced_frame. +-- +CREATE TEMP TABLE rpr_nullval (id INT, val INT); +INSERT INTO rpr_nullval VALUES (1, 10), (2, NULL), (3, NULL), (4, 20); --- Test: group merge ((A B) (A B)+) -> (A B){2,} -CREATE TEMP VIEW v_opt_merge_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B) (A B)+) - DEFINE - A AS price > 100, - B AS price <= 100 +SELECT id, val, + last_value(val) IGNORE NULLS OVER w AS lv_ignull, + count(*) OVER w AS cnt +FROM rpr_nullval +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS val IS NOT NULL, + B AS val IS NULL ); -SELECT pg_get_viewdef('v_opt_merge_group'); -- original: ((a b) (a b)+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group; -- expected: (a b){2,} --- Test: group merge A B (A B)+ -> (A B){2,} -CREATE TEMP VIEW v_opt_merge_group2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A B (A B)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_group2'); -- original: (a b (a b)+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group2; -- expected: (a b){2,} +-- +-- NULL handling +-- --- Test: group merge (A B) (A B)+ (A B) -> (A B){3,} -CREATE TEMP VIEW v_opt_merge_group3 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B) (A B)+ (A B)) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_merge_group3'); -- original: ((a b) (a b)+ (a b)) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group3; -- expected: (a b){3,} +CREATE TEMP TABLE stock_null (company TEXT, tdate DATE, price INTEGER); +INSERT INTO stock_null VALUES ('c1', '2023-07-01', 100); +INSERT INTO stock_null VALUES ('c1', '2023-07-02', NULL); -- NULL in middle +INSERT INTO stock_null VALUES ('c1', '2023-07-03', 200); +INSERT INTO stock_null VALUES ('c1', '2023-07-04', 150); --- Test: group merge A B A B (A B)+ A B A B -> (A B){5,} -CREATE TEMP VIEW v_opt_merge_group4 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A B A B (A B)+ A B A B) - DEFINE - A AS price > 100, - B AS price <= 100 +SELECT company, tdate, price, count(*) OVER w AS match_count +FROM stock_null +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP DOWN) + DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < +PREV(price) ); -SELECT pg_get_viewdef('v_opt_merge_group4'); -- original: (a b a b (a b)+ a b a b) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group4; -- expected: (a b){5,} --- Test: group merge C A B (A B)+ A B C -> C (A B){3,} C -CREATE TEMP VIEW v_opt_merge_group5 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (C A B (A B)+ A B C) - DEFINE - A AS price > 100, - B AS price <= 100, - C AS price > 200 -); -SELECT pg_get_viewdef('v_opt_merge_group5'); -- original: (c a b (a b)+ a b c) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group5; -- expected: c (a b){3,} c +-- Consecutive NULLs: PREV navigates through NULL values +CREATE TEMP TABLE rpr_consec_null (id INT, val INT); +INSERT INTO rpr_consec_null VALUES + (1, 100), (2, NULL), (3, NULL), (4, NULL), (5, 200), (6, 300); --- Test: consecutive GROUP merge (A B)+ (A B)+ -> (A B){2,} -CREATE TEMP VIEW v_opt_merge_consec_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company +-- PREV(val) IS NULL succeeds for both null_slot (first row) and actual NULL +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_consec_null +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B)+ (A B)+) + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+ C) DEFINE - A AS price > 100, - B AS price <= 100 + A AS val IS NULL, + B AS val IS NULL AND PREV(val) IS NULL, + C AS val IS NOT NULL ); -SELECT pg_get_viewdef('v_opt_merge_consec_group'); -- original: ((a b)+ (a b)+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group; -- expected: (a b){2,} --- Test: consecutive GROUP merge with different quantifiers (A B){2} (A B){3} -> (A B){5} -CREATE TEMP VIEW v_opt_merge_consec_group2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company +-- NEXT(val) through consecutive NULLs +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_consec_null +WINDOW w AS ( + ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A B){2} (A B){3}) + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+ C) DEFINE - A AS price > 100, - B AS price <= 100 + A AS val IS NOT NULL, + B AS val IS NULL AND NEXT(val) IS NULL, + C AS val IS NULL AND NEXT(val) IS NOT NULL ); -SELECT pg_get_viewdef('v_opt_merge_consec_group2'); -- original: ((a b){2} (a b){3}) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_consec_group2; -- expected: (a b){5} --- Test {n} quantifier display -CREATE TEMP VIEW v_quantifier_n AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A{3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_quantifier_n'); - --- Test {n,} quantifier display -CREATE TEMP VIEW v_quantifier_n_plus AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A{2,}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_quantifier_n_plus'); - --- Test: flatten nested SEQ (A (B C)) -> A B C -CREATE TEMP VIEW v_opt_flatten_seq AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A (B C)) - DEFINE - A AS price > 100, - B AS price > 150, - C AS price < 150 -); -SELECT pg_get_viewdef('v_opt_flatten_seq'); -- original: (a (b c)) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_seq; -- optimized: a b c - --- Test: flatten nested ALT (A | (B | C)) -> (A | B | C) -CREATE TEMP VIEW v_opt_flatten_alt AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | (B | C))+) - DEFINE - A AS price > 200, - B AS price > 100, - C AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_flatten_alt'); -- original: ((a | (b | c))+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_alt; -- optimized: ((a | b | c))+ - --- Test: unwrap GROUP{1,1} ((A)) -> A -CREATE TEMP VIEW v_opt_unwrap_group AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A))) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_unwrap_group'); -- original: (((a))) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_group; -- optimized: a - --- Test: quantifier multiplication (A{2}){3} -> A{6} -CREATE TEMP VIEW v_opt_quant_mult AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A{2}){3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult'); -- original: ((a{2}){3}) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult; -- optimized: a{6} - --- Test: quantifier multiplication (A{2,4}){3} -> A{6,12} -CREATE TEMP VIEW v_opt_quant_mult_range AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A{2,4}){3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult_range'); -- original: ((a{2,4}){3}) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range; -- optimized: a{6,12} - --- Test: quantifier multiplication blocked (A{2}){3,5} -> no change --- outer range with child exact > 1 causes gaps (6,8,10 not 6,7,8,9,10) -CREATE TEMP VIEW v_opt_quant_mult_range2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A{2}){3,5}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult_range2'); -- original: ((a{2}){3,5}) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range2; -- NOT optimized: (a{2}){3,5} - --- Test: quantifier multiplication blocked by INF (A+){3} -> no change -CREATE TEMP VIEW v_opt_quant_mult_inf AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A+){3}) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_quant_mult_inf'); -- original: ((a+){3}) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_inf; -- no multiply: (a+){3} - --- Test: unwrap single-item ALT after duplicate removal (A | A) -> A -CREATE TEMP VIEW v_opt_unwrap_alt AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN ((A | A)+) - DEFINE - A AS price > 100 -); -SELECT pg_get_viewdef('v_opt_unwrap_alt'); -- original: ((a | a)+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_alt; -- optimized: a+ - --- Test: GROUP{1,1} to SEQ with flatten ((A B)(C D)) -> A B C D -CREATE TEMP VIEW v_opt_group_to_seq AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A B)(C D))) - DEFINE - A AS price > 200, - B AS price > 150, - C AS price > 100, - D AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_group_to_seq'); -- original: (((a b)(c d))) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_group_to_seq; -- optimized: a b c d - --- Test: combined consecutive GROUP + prefix merge A B (A B)+ (A B)+ -> (A B){3,} -CREATE TEMP VIEW v_opt_combined_merge AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A B (A B)+ (A B)+) - DEFINE - A AS price > 100, - B AS price <= 100 -); -SELECT pg_get_viewdef('v_opt_combined_merge'); -- original: (a b (a b)+ (a b)+) -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_combined_merge; -- expected: (a b){3,} - --- Test: nested ALT pattern - bug reproduction -CREATE TEMP VIEW v_opt_nested_alt AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A B) | C) D | A B C) - DEFINE - A AS price <= 100, - B AS price <= 150, - C AS price <= 200, - D AS price > 200 -); -SELECT pg_get_viewdef('v_opt_nested_alt'); -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt; - --- Test: nested ALT with unbounded - A+ inside -CREATE TEMP VIEW v_opt_nested_alt2 AS -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (((A+ B) | C) D | A B C) - DEFINE - A AS price <= 100, - B AS price <= 150, - C AS price <= 200, - D AS price > 200 -); -SELECT pg_get_viewdef('v_opt_nested_alt2'); -EXPLAIN (COSTS OFF) SELECT * FROM v_opt_nested_alt2; - --- --- Error cases --- - --- row pattern definition variable name must not appear more than once -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price), - UP AS price > PREV(price) -); - --- subqueries in DEFINE clause are not supported -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START LOWPRICE) - DEFINE - START AS TRUE, - LOWPRICE AS price < (SELECT 100) -); - --- aggregates in DEFINE clause are not supported -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START LOWPRICE) - DEFINE - START AS TRUE, - LOWPRICE AS price < count(*) -); - --- FRAME must start at current row when row pattern recognition is used -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); - --- EXCLUDE is not permitted -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - EXCLUDE CURRENT ROW - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); - --- SEEK is not supported -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - SEEK - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); - --- PREV's argument must have at least 1 column reference -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(1), - DOWN AS price < PREV(1) -); - --- Unsupported quantifier -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - INITIAL - PATTERN (START UP~ DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(1), - DOWN AS price < PREV(1) -); - --- PREV(1) missing column reference (error) -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - INITIAL - PATTERN (START UP+? DOWN+) - DEFINE - START AS TRUE, - UP AS price > PREV(1), - DOWN AS price < PREV(1) -); - --- Maximum pattern variables is 251 (RPR_VARID_MAX) - --- Error: 252 variables exceeds limit of 251 -DO $$ -DECLARE - pattern_vars text; - define_vars text; - query text; -BEGIN - SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), - string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') - INTO pattern_vars, define_vars - FROM generate_series(1, 252) i; - - query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (%s) - DEFINE %s)', pattern_vars, define_vars); - - EXECUTE query; -END; -$$; - --- Error: 253 variables exceeds limit of 251 -DO $$ -DECLARE - pattern_vars text; - define_vars text; - query text; -BEGIN - SELECT string_agg('v' || lpad(i::text, 3, '0'), ' '), - string_agg('v' || lpad(i::text, 3, '0') || ' AS TRUE', ', ') - INTO pattern_vars, define_vars - FROM generate_series(1, 253) i; - - query := format('SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (%s) - DEFINE %s)', pattern_vars, define_vars); - - EXECUTE query; -END; -$$; - - CREATE TEMP TABLE stock_null (company TEXT, tdate DATE, price INTEGER); - INSERT INTO stock_null VALUES ('c1', '2023-07-01', 100); - INSERT INTO stock_null VALUES ('c1', '2023-07-02', NULL); -- NULL in middle - INSERT INTO stock_null VALUES ('c1', '2023-07-03', 200); - INSERT INTO stock_null VALUES ('c1', '2023-07-04', 150); - - SELECT company, tdate, price, count(*) OVER w AS match_count - FROM stock_null - WINDOW w AS ( - PARTITION BY company - ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (START UP DOWN) - DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < -PREV(price) - ); - - --- Overlapping match tests (requires multi-context for correct behavior) --- Using array flags: 'X' = ANY(flags) for multi-TRUE support - --- Test 1: A B C D E | B C D | C D E F - three overlapping patterns --- Different end points: B C D (4), A B C D E (5), C D E F (6) -WITH test_overlap1 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']), - (6, ARRAY['F']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap1 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E | B C D | C D E F) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags), - F AS 'F' = ANY(flags) -); - -WITH test_overlap1 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']), - (6, ARRAY['F']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap1 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B C D E | B C D | C D E F) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags), - F AS 'F' = ANY(flags) -); --- PAST LAST: only one match --- TO NEXT ROW with multi-context: three matches --- Row 1: A B C D E (1-5) --- Row 2: B C D (2-4) <- ends first! --- Row 3: C D E F (3-6) <- ends last! - --- Test 1b: Longer pattern FAILS, shorter pattern should survive --- Pattern: A+ B C D E | B+ C --- A+ B C D E fails (no E found in sequence) --- B+ C matches at rows 2-3 --- Result: match 2-3 (B+ C) -WITH test_overlap1b AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap1b -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A+ B C D E | B+ C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); - --- Test 2: A B+ C | B+ D - long B sequence with different endings -WITH test_overlap2 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['B']), - (4, ARRAY['B']), - (5, ARRAY['B']), - (6, ARRAY['C']), - (7, ARRAY['B']), - (8, ARRAY['B']), - (9, ARRAY['B']), - (10, ARRAY['D']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_overlap2 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B+ C | B+ D) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); --- Current result (correct): --- Row 1: A B+ C (1-6) --- Row 7-9: B+ D (7-10, 8-10, 9-10) --- Note: Row 2-6 cannot match B+ D because Row 6 is C, not D --- With absorption: 8-10 and 9-10 would be absorbed by 7-10 (earlier context covers later) - --- Test 3: Greedy quantifier with late failure - A B C+ D | A B --- Pattern expects D after C+, but E comes instead ("betrayal") -WITH test_betrayal AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['C']), - (5, ARRAY['C']), - (6, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_betrayal -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C+ D | A B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); --- A B C+ D fails at Row 6 (E instead of D) --- Question: Does it fallback to A B (1-2)? - --- Test 4: Lexical Order test - A B C | A B C D E --- SQL standard: first matching alternative wins -WITH test_lexical AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_lexical -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C | A B C D E) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); --- SQL standard Lexical Order: A B C (1-3) wins (first alternative) - --- Test 4b: Reversed pattern order - A B C D E | A B C -WITH test_lexical2 AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_lexical2 -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E | A B C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); --- SQL standard Lexical Order: A B C D E (1-5) wins (first alternative) - --- Test 5: Multiple TRUE in single row (overlapping pattern variables) --- Each row matches multiple DEFINE conditions simultaneously -WITH test_multi_true AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','B']), -- A and B both TRUE - (2, ARRAY['B','C']), -- B and C both TRUE - (3, ARRAY['C','D']), -- C and D both TRUE - (4, ARRAY['D','E']), -- D and E both TRUE - (5, ARRAY['E','_']) -- E only - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_multi_true -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); --- Row 1: A=T, B=T -> matches A --- Row 2: B=T, C=T -> matches B --- Row 3: C=T, D=T -> matches C --- Row 4: D=T, E=T -> matches D --- Row 5: E=T -> matches E --- Result: match 1-5 (A B C D E) - --- Test 6: Diagonal pattern with multi-TRUE (shifted overlap) -WITH test_diagonal AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','_']), - (2, ARRAY['B','A']), - (3, ARRAY['C','B']), - (4, ARRAY['D','C']), - (5, ARRAY['_','D']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_diagonal -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B C D) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); --- Possible matches: --- Start Row 1: A(1) B(2) C(3) D(4) -> 1-4 --- Start Row 2: A(2) B(3) C(4) D(5) -> 2-5 (because Row 2 has A too!) - --- =================================================================== --- Context Absorption Tests --- =================================================================== - --- Test absorption 1: Basic A+ pattern - later contexts absorbed by earlier -WITH test_absorb_basic AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['A']), - (5, ARRAY['B']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_basic -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A+) - DEFINE A AS 'A' = ANY(flags) -); --- Pattern A+ is absorbable (unbounded first element, only one unbounded) --- 4 matches: (1-4, 2-4, 3-4, 4-4) - --- Test absorption 2: A+ B pattern - absorption with fixed suffix -WITH test_absorb_suffix AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_suffix -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A+ B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); --- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) --- All potential matches end at same row (row 4 with B) --- 3 matches: (1-4, 2-4, 3-4) - --- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) -WITH test_absorb_alt AS ( - SELECT * FROM (VALUES - (1, ARRAY['B']), - (2, ARRAY['B']), - (3, ARRAY['B']), - (4, ARRAY['D']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_alt -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (B+ C | B+ D) - DEFINE - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags) -); --- Both branches B+ C and B+ D are absorbable (B+ unbounded first) --- B+ D branch matches: 3 matches (1-4, 2-4, 3-4) - --- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) -WITH test_no_absorb AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['B']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_no_absorb -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A B+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); --- Pattern A B+ is NOT absorbable (A bounded first, B+ unbounded but not first) --- Only Row 1 can start match (only row with A), so only one match: 1-4 - --- Test absorption 5: GROUP merge enables absorption -WITH test_absorb_group AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['A']), - (4, ARRAY['B']), - (5, ARRAY['A']), - (6, ARRAY['B']), - (7, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_absorb_group -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN ((A B) (A B)+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); --- Pattern optimized: (A B) (A B)+ -> (A B){2,} --- 2 matches: 1-6 (3 reps), 3-6 (2 reps) - --- Test absorption 6: Multiple unbounded - first element unbounded enables absorption -WITH test_multi_unbounded AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['B']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM test_multi_unbounded -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW - PATTERN (A+ B+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); --- 2 matches: 1-4, 2-4 (same endpoint 4) - --- ============================================ --- Jacob's RPR Patterns (from jacob branch) --- ============================================ - --- Test: A? (optional, greedy) -WITH jacob_optional AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_optional -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A?) - DEFINE A AS 'A' = ANY(flags) -); --- Expected: 1-1 (matches A) - --- Test: A{2} (exact count) -WITH jacob_exact AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_exact -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A{2}) - DEFINE A AS 'A' = ANY(flags) -); --- Expected: 1-2 - --- Test: A{1,3} (bounded range, greedy) -WITH jacob_bounded AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['A']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_bounded -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A{1,3}) - DEFINE A AS 'A' = ANY(flags) -); --- Expected: 1-3 (greedy takes max), then 4-4 - --- Test: A | B (simple alternation) -WITH jacob_simple_alt AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_simple_alt -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A | B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); --- Expected: 1-1 (A), 2-2 (B) - --- Test: A | B | C (three-way alternation) -WITH jacob_three_alt AS ( - SELECT * FROM (VALUES - (1, ARRAY['B']), - (2, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_three_alt -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A | B | C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); --- Expected: 1-1 (B) - --- Test: A B C (concatenation) -WITH jacob_concat AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_concat -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); --- Expected: 1-3 - --- Test: A B? C (optional middle) -WITH jacob_optional_mid AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['C']), - (3, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_optional_mid -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B? C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); --- Expected: 1-2 (A C, B skipped) - --- Test: (A B){2} (nested group with quantifier) -WITH jacob_nested_group AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['A']), - (4, ARRAY['B']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_nested_group -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN ((A B){2}) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); --- Expected: 1-4 (A B A B) - --- Test: (A){3} (quantifier on grouped single element) -WITH jacob_group_quant AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_group_quant -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN ((A){3}) - DEFINE A AS 'A' = ANY(flags) -); --- Expected: 1-3 - --- Test: A B C | A B C D E (lexical order - first alt wins) -WITH jacob_lex_first AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_lex_first -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C | A B C D E) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); --- Expected: 1-3 (A B C wins by lexical order) - --- Test: A B C D E | A B C (lexical order - longer first wins) -WITH jacob_lex_long AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['C']), - (4, ARRAY['D']), - (5, ARRAY['E']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_lex_long -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B C D E | A B C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags), - D AS 'D' = ANY(flags), - E AS 'E' = ANY(flags) -); --- Expected: 1-5 (A B C D E wins by lexical order) - --- ============================================ --- Alternation with quantifiers (BUG cases from Jacob's tests) --- ============================================ - --- Test: (A | B)+ C - alternation inside quantified group followed by C -WITH jacob_alt_quant AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['B']), - (3, ARRAY['A']), - (4, ARRAY['C']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_alt_quant -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN ((A | B)+ C) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); --- Expected: 1-4 (A B A C) - --- Test: ((A | B) C)+ - alternation inside group with outer quantifier -WITH jacob_alt_group AS ( - SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['C']), - (3, ARRAY['B']), - (4, ARRAY['C']), - (5, ARRAY['X']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_alt_group -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (((A | B) C)+) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags), - C AS 'C' = ANY(flags) -); --- Expected: 1-4 (A C B C) - --- ============================================ --- RELUCTANT quantifiers --- ============================================ - --- Test: A+? B (reluctant) - B available at row 3, A exits early -WITH jacob_reluctant AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','_']), - (2, ARRAY['A','_']), - (3, ARRAY['A','B']), - (4, ARRAY['B','_']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_reluctant -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A+? B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - --- Test: A{1,3}? B (reluctant bounded) - same data, bounded quantifier -WITH jacob_reluctant_bounded AS ( - SELECT * FROM (VALUES - (1, ARRAY['A','_']), - (2, ARRAY['A','_']), - (3, ARRAY['A','B']), - (4, ARRAY['B','_']) - ) AS t(id, flags) -) -SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end -FROM jacob_reluctant_bounded -WINDOW w AS ( - ORDER BY id - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A{1,3}? B) - DEFINE - A AS 'A' = ANY(flags), - B AS 'B' = ANY(flags) -); - --- ============================================ --- Nested quantifiers (pathological patterns) --- ============================================ --- These patterns previously caused segfault or infinite loop. --- Now they are either optimized at compile time or handled safely at runtime. - --- Test: (A*)* - nested unbounded quantifiers (optimized to A*) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A*)*) - DEFINE A AS TRUE -); - --- Test: (A*)+ - inner nullable, outer requires one (optimized to A*) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A*)+) - DEFINE A AS TRUE -); - --- Test: (A+)* - outer nullable (optimized to A*) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A+)*) - DEFINE A AS TRUE -); - --- Test: (A+)+ - both require match (optimized to A+) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A+)+) - DEFINE A AS TRUE -); - --- Test: (A* B*)* - complex nested pattern (runtime protection) --- Not optimized but handled safely by empty-match loop prevention -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 5) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((A* B*)*) - DEFINE A AS TRUE, B AS TRUE -); - --- Test: (((A)*)*)* - triple nested (optimized through recursive optimization) -SELECT v, count(*) OVER w AS c -FROM (SELECT generate_series(1, 3) v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ((((A)*)*)*) - DEFINE A AS TRUE -); +DROP TABLE rpr_consec_null; diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index 97b62a73a0e..da28de1a8f9 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -1050,7 +1050,7 @@ WINDOW w AS ( ); -- Expected: ERROR: syntax error at or near "*" --- ? ? (invalid combination) +-- ? ? (parsed as ?? reluctant quantifier) SELECT COUNT(*) OVER w FROM rpr_reluctant WINDOW w AS ( @@ -1533,7 +1533,17 @@ SELECT pg_get_viewdef('rpr_serial_v8'::regclass); DROP VIEW rpr_serial_v8; -DROP TABLE rpr_serial; +-- Reluctant {1}? quantifier deparse through ruleutils +CREATE VIEW rpr_quant_reluctant_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{1}? B) + DEFINE A AS val > 0, B AS val > 0); +SELECT pg_get_viewdef('rpr_quant_reluctant_v'::regclass); +DROP VIEW rpr_quant_reluctant_v; -- Materialized view (if supported) @@ -1560,6 +1570,40 @@ SELECT * FROM rpr_mview_v1 ORDER BY id; DROP MATERIALIZED VIEW rpr_mview_v1; DROP TABLE rpr_mview; +-- CREATE TABLE AS SELECT with RPR +CREATE TABLE rpr_ctas (id INT, val INT); +INSERT INTO rpr_ctas VALUES (1, 10), (2, 20), (3, 15), (4, 25); + +CREATE TABLE rpr_ctas_result AS +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_ctas +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS val > PREV(val) +); +SELECT * FROM rpr_ctas_result ORDER BY id; + +-- INSERT INTO ... SELECT with RPR +CREATE TABLE rpr_insert_target (id INT, val INT, cnt BIGINT); +INSERT INTO rpr_insert_target +SELECT id, val, count(*) OVER w +FROM rpr_ctas +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS val > PREV(val) +); +SELECT * FROM rpr_insert_target ORDER BY id; + +DROP TABLE rpr_ctas_result; +DROP TABLE rpr_insert_target; +DROP TABLE rpr_ctas; + -- Prepared statements (tests outfuncs.c / readfuncs.c) CREATE TABLE rpr_prep (id INT, val INT); @@ -1822,6 +1866,32 @@ SELECT pg_get_viewdef('rpr_multiwin_v'::regclass); DROP VIEW rpr_multiwin_v; DROP TABLE rpr_multiwin; +-- {n} quantifier display in view +CREATE VIEW rpr_quant_n_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE A AS val > 0); +SELECT pg_get_viewdef('rpr_quant_n_v'::regclass); +DROP VIEW rpr_quant_n_v; + +-- {n,} quantifier display in view +CREATE VIEW rpr_quant_n_plus_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE A AS val > 0); +SELECT pg_get_viewdef('rpr_quant_n_plus_v'::regclass); +DROP VIEW rpr_quant_n_plus_v; + +DROP TABLE rpr_serial; + -- ============================================================ -- Error Cases Tests -- ============================================================ @@ -1901,6 +1971,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS A.val > 0 ); +-- Expected: ERROR: pattern variable qualified column reference "a.val" is not supported -- PATTERN-only variable qualified name: not supported even without DEFINE entry SELECT COUNT(*) OVER w @@ -1911,6 +1982,7 @@ WINDOW w AS ( PATTERN (A+ B+) DEFINE A AS B.val > 0 ); +-- Expected: ERROR: pattern variable qualified column reference "b.val" is not supported -- DEFINE-only variable qualified name: still a pattern variable, not a range variable SELECT COUNT(*) OVER w @@ -1921,6 +1993,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS val > 0, B AS B.val > 0 ); +-- Expected: ERROR: pattern variable qualified column reference "b.val" is not supported -- FROM-clause range variable qualified name: not allowed (prohibited by §6.5) SELECT COUNT(*) OVER w @@ -1931,6 +2004,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS rpr_err.val > 0 ); +-- Expected: ERROR: range variable qualified column reference "rpr_err.val" is not allowed -- Semantic errors @@ -1965,7 +2039,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS COUNT(*) > 0 ); --- Expected: ERROR or works depending on implementation +-- Expected: ERROR: aggregate functions are not allowed in DEFINE -- Subquery in DEFINE (NOT SUPPORTED) SELECT COUNT(*) OVER w @@ -2108,6 +2182,13 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); +-- Data execution: SEQ flatten produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + -- ALT flatten: (A | (B | C))+ -> (a | b | c)+ EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -2120,6 +2201,13 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A | B | A)+) DEFINE A AS val <= 50, B AS val > 50); +-- Data execution: ALT dedup produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | A)+) DEFINE A AS val <= 50, B AS val > 50); + -- Quantifier multiply: (A{2}){3} -> a{6} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -2305,6 +2393,13 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); +-- Data execution: GROUP unwrap produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + -- Reluctant optimization bypass: VAR merge -- A+? A stays as a+? a (greedy A+ A merges to a{2,}) EXPLAIN (COSTS OFF) @@ -2368,6 +2463,62 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING AFTER MATCH SKIP PAST LAST ROW PATTERN (A+?) DEFINE A AS val > 0); +-- Duplicate GROUP removal: ((A | B)+ | (A | B)+) -> (a | b)+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B)+ | (A | B)+) DEFINE A AS val <= 50, B AS val > 50); + +-- Consecutive VAR merge with zero-min: A* A+ -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* A+) DEFINE A AS val > 0); + +-- Consecutive VAR merge (4-element): A A{2} A+ A{3} -> a{7,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A{2} A+ A{3}) DEFINE A AS val > 0); + +-- PREFIX+SUFFIX merge (5-way): A B A B (A B)+ A B A B -> (a b){5,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B A B (A B)+ A B A B) + DEFINE A AS val <= 50, B AS val > 50); + +-- Unwrap single-item ALT after dedup: (A | A)+ -> a+ +-- ALT dedup reduces to single-item, then GROUP unwrap +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | A)+) DEFINE A AS val > 0); + +-- GROUP{1,1} to SEQ with flatten: ((A B)(C D)) -> a b c d +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)(C D))) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + +-- Nested ALT pattern: ((A B) | C) D | A B C +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B) | C) D | A B C) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + +-- Nested ALT with unbounded: ((A+ B) | C) D | A B C +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + -- ============================================================ -- Absorption Flag Display Tests -- ============================================================ @@ -2450,6 +2601,19 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING AFTER MATCH SKIP PAST LAST ROW PATTERN (A+) DEFINE A AS val > 0); +-- Reluctant {1}? quantifier deparse +-- A{1}? is a reluctant {1,1} quantifier. The deparse code must +-- output "{1}" explicitly to disambiguate from a bare "?" quantifier +-- (which would mean {0,1}). +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM rpr_plan +WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1}? B) + DEFINE A AS val > 0, B AS val > 0 +); + -- ============================================================ -- Absorption Analysis Tests -- ============================================================ @@ -2871,7 +3035,8 @@ WINDOW w AS ( ) GROUP BY category ORDER BY category; --- Expected: ERROR (GROUP BY with window RPR not supported) +-- Expected: ERROR: syntax error at or near "GROUP" +-- (GROUP BY after WINDOW clause is not valid SQL syntax) -- ============================================================ -- Subquery and CTE Tests @@ -3243,7 +3408,8 @@ INSERT INTO rpr_sort VALUES (1, 'A', 30), (2, 'B', 20), (3, 'A', 10), (4, 'B', 40), (5, 'A', 50), (6, 'B', 60); --- RPR with GROUP BY +-- RPR with GROUP BY (aggregate in DEFINE → ERROR before GROUP BY interaction) +-- Expected: ERROR: aggregate functions are not allowed in DEFINE SELECT category, COUNT(*) as group_cnt, @@ -3259,7 +3425,8 @@ WINDOW w AS ( ) ORDER BY category; --- RPR with HAVING +-- RPR with HAVING (same aggregate-in-DEFINE error) +-- Expected: ERROR: aggregate functions are not allowed in DEFINE SELECT category, COUNT(*) as group_cnt, @@ -3771,7 +3938,3 @@ FROM (SELECT id, val, ) s; DROP TABLE rpr_plan; - --- ============================================================ --- End of rpr_base.sql --- ============================================================ diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql index d017a2292d2..4bb49650bb7 100644 --- a/src/test/regress/sql/rpr_explain.sql +++ b/src/test/regress/sql/rpr_explain.sql @@ -6,8 +6,9 @@ -- This test suite validates EXPLAIN output for RPR queries, -- including NFA statistics shown in EXPLAIN ANALYZE: -- - NFA States: peak, total, merged --- - NFA Contexts: peak, total, absorbed, skipped +-- - NFA Contexts: peak, total, pruned -- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) +-- - NFA: absorbed (len min/max/avg), skipped (len min/max/avg) -- - Pattern deparse formatting -- - Multiple output formats (text, JSON, XML) -- @@ -310,7 +311,7 @@ WINDOW w AS ( );'); DROP VIEW rpr_v; --- Grouped pattern with quantifier - state merging +-- Grouped pattern with quantifier - state count with grouping CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) @@ -428,7 +429,7 @@ WINDOW w AS ( );'); DROP VIEW rpr_v; --- High state merging - alternation with plus quantifier +-- High state count - alternation with plus quantifier CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) @@ -499,7 +500,7 @@ WINDOW w AS ( DROP VIEW rpr_v; -- ============================================================ --- Context Statistics Tests (peak, total, absorbed, skipped) +-- Context Statistics Tests (peak, total, pruned + absorbed/skipped) -- ============================================================ -- Context absorption with unbounded quantifier at start @@ -671,7 +672,7 @@ WINDOW w AS ( );'); DROP VIEW rpr_v; --- Mix of short and long matches +-- Uniform match length with mismatches from gap rows (v%20 = 11..15) CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) @@ -702,8 +703,8 @@ DROP VIEW rpr_v; -- Mismatch Length Statistics Tests -- ============================================================ --- Pattern that causes mismatches with length > 1 --- Mismatch happens when partial match fails after processing multiple rows +-- Pattern with complete match every cycle: 0 mismatched +-- A(1,2,3) B(4,5) C(6) repeats perfectly; X rows are pruned, not mismatched CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM ( @@ -837,33 +838,6 @@ WINDOW w AS ( )'); DROP VIEW rpr_v; --- ============================================================ --- XML Format Tests --- ============================================================ - --- XML format output -CREATE TEMP VIEW rpr_v AS -SELECT count(*) OVER w -FROM generate_series(1, 30) AS s(v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B) - DEFINE A AS v % 2 = 1, B AS v % 2 = 0 -); -SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; -SELECT rpr_explain_filter(' -EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) -SELECT count(*) OVER w -FROM generate_series(1, 30) AS s(v) -WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - PATTERN (A B) - DEFINE A AS v % 2 = 1, B AS v % 2 = 0 -)'); -DROP VIEW rpr_v; - -- JSON format with mismatch statistics -- Pattern A B C expects 1,2,3 but gets 1,2,4 twice causing mismatches CREATE TEMP VIEW rpr_v AS @@ -912,6 +886,33 @@ WINDOW w AS ( )'); DROP VIEW rpr_v; +-- ============================================================ +-- XML Format Tests +-- ============================================================ + +-- XML format output +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); +DROP VIEW rpr_v; + -- ============================================================ -- Multiple Partitions Tests -- ============================================================ diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index a3f01b60bc4..c0f4dfa1248 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -255,6 +255,119 @@ WINDOW w AS ( A AS 'A' = ANY(flags) ); +-- Absorption with fixed suffix: A+ B +WITH test_absorb_suffix AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_suffix +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Per-branch absorption with ALT: B+ C | B+ D +WITH test_absorb_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (B+ C | B+ D) + DEFINE + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Non-absorbable: A B+ (unbounded not in first position) +WITH test_no_absorb AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_no_absorb +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- GROUP merge enables absorption: (A B) (A B)+ optimized to (A B){2,} +WITH test_absorb_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B) (A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Multiple unbounded: A+ B+ (first element unbounded enables absorption) +WITH test_multi_unbounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_unbounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Context Lifecycle -- ============================================================ @@ -524,7 +637,8 @@ WINDOW w AS ( ); -- Optional reluctant group: (A B)?? C --- nfa_advance_begin: reluctant prefers skip (0 iterations) over enter +-- nfa_advance_begin: reluctant tries skip first, but skip path needs C +-- at row 1 which is A → skip fails. Enter path succeeds: A(1) B(2) C(3). WITH test_optional_reluctant AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -571,6 +685,36 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Reluctant optional group skip-to-FIN +-- When a reluctant optional group's skip path reaches FIN, the group +-- entry path is abandoned (nodeWindowAgg.c nfa_advance_begin). +-- Pattern: C (A B)?? -- after C matches, the reluctant group (A B)?? +-- prefers to skip. Skip goes to FIN (group is last element), so +-- the match completes with just C. +WITH test_begin_skip_fin AS ( + SELECT * FROM (VALUES + (1, ARRAY['C']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['C','A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_begin_skip_fin +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (C (A B)??) + DEFINE + C AS 'C' = ANY(flags), + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Match Phase -- ============================================================ @@ -1198,6 +1342,50 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- A+? B (reluctant plus): exits A at first B availability +-- (Same scenario as greedy-vs-reluctant comparison above; retained for +-- standalone quantifier coverage alongside A{1,3}? and A{2,3}? below) +WITH test_reluctant_plus AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_reluctant_plus +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- A{1,3}? B (reluctant bounded): same data, bounded quantifier +WITH test_reluctant_bounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_reluctant_bounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Pathological Pattern Runtime Protection -- ============================================================ @@ -1481,6 +1669,211 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Overlapping match: A B C D E | B C D | C D E F (SKIP PAST LAST ROW) +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + +-- Same with SKIP TO NEXT ROW: three overlapping matches +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + +-- Longer pattern fails, shorter survives: A+ B C D E | B+ C +WITH test_overlap1b AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1b +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C D E | B+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + +-- Long B sequence with different endings: A B+ C | B+ D +WITH test_overlap2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['B']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+ C | B+ D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Greedy with late failure ("betrayal"): A B C+ D | A B +WITH test_betrayal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['C']), + (5, ARRAY['C']), + (6, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_betrayal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C+ D | A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Multiple TRUE per row: overlapping pattern variables +WITH test_multi_true AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B','C']), + (3, ARRAY['C','D']), + (4, ARRAY['D','E']), + (5, ARRAY['E','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_true +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + +-- Diagonal pattern with shifted multi-TRUE overlap +WITH test_diagonal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['B','A']), + (3, ARRAY['C','B']), + (4, ARRAY['D','C']), + (5, ARRAY['_','D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_diagonal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ((A | B) C)+ - alternation inside group with outer quantifier +WITH test_alt_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), + (3, ARRAY['B']), + (4, ARRAY['C']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_alt_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B) C)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + -- ============================================================ -- Deep Nested Groups -- ============================================================ @@ -1625,6 +2018,55 @@ WINDOW w AS ( C AS 'C' = ANY(flags) ); +-- (A B){2} - group with exact quantifier +WITH test_group_exact AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_group_exact +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Nested END->END fast-forward +-- When an inner group has a nullable body and count < min, the +-- fast-forward path exits through the outer END, incrementing +-- the outer group's count (nodeWindowAgg.c nfa_advance_end). +-- Pattern: ((A?){2,3}){2,3} -- nested groups, neither collapses +-- because the optimizer cannot safely multiply non-exact quantifiers. +-- Data has no A rows, forcing all-empty iterations via fast-forward. +WITH test_nested_ff AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_ff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A?){2,3}){2,3}) + DEFINE + A AS 'A' = ANY(flags) +); + -- ============================================================ -- SKIP Options (Runtime) -- ============================================================ @@ -1751,6 +2193,8 @@ ORDER BY mode, id; -- ============================================================ -- INITIAL Mode (Runtime) +-- Placeholder: INITIAL is not yet implemented (syntax error). +-- Kept here so tests convert to runtime tests when implemented. -- ============================================================ -- INITIAL mode (not yet supported - produces syntax error) @@ -1917,6 +2361,62 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- N FOLLOWING + SKIP TO NEXT ROW: overlapping matches bounded by frame +-- Row 1: frame [1,4], A(1-3) B(4) -> match +-- Row 2: frame [2,5], A(2-3) B(4) -> match +-- Row 3: frame [3,6], A(3) B(4) -> match +-- Row 5: frame [5,6], A(5) B(6) -> match +WITH test_n_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_n_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Frame exactly 1 row short of potential match +-- From row 1: A A A B needs 4 rows but frame holds 3 -> no match +-- From row 2: A A B fits in 3-row frame -> match +WITH test_frame_one_short AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_one_short +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Special Partition Cases -- ============================================================ @@ -2567,7 +3067,8 @@ WINDOW w AS ( -- (A?){0,3}: min=0, nullable inner. -- A never matches. A? matches empty, min=0 satisfied immediately. --- Expected: empty match (match_start = match_end) for every row +-- Per standard: empty match expected for every row. +-- XXX: visited bitmap blocks empty iteration → no match (same as {2,3}) WITH test_728_min0 AS ( SELECT * FROM (VALUES (1, ARRAY['B']), @@ -2590,7 +3091,8 @@ WINDOW w AS ( -- (A?){1,3}: min=1, nullable inner. -- A never matches. Need 1 empty iteration to satisfy min=1. --- Expected: empty match for every row +-- Per standard: empty match expected for every row. +-- XXX: visited bitmap blocks empty iteration → no match (same as {2,3}) WITH test_728_min1 AS ( SELECT * FROM (VALUES (1, ARRAY['B']), @@ -2639,7 +3141,8 @@ WINDOW w AS ( -- (A?){2,3} mixed: some rows match A, some don't -- Rows 1-2: A matches, greedy takes 2 → min satisfied -- Row 3: A doesn't match, needs 2 empty iterations for min=2 --- Expected: all rows produce matches +-- XXX: Row 3 fails due to visited bitmap (same as pure empty {2,3}) +-- Row 4: A matches 1 real iter + 1 ff empty exit → match 4-4 WITH test_728_min2_mixed AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -2773,4 +3276,3 @@ WINDOW w AS ( A AS price > 100, B AS TRUE ); - -- 2.50.1 (Apple Git-155)