From 18aaeff3851eddd785341d6d7d04ccc2a2e8069f Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Thu, 11 Jun 2026 09:19:20 +0900 Subject: [PATCH 76/77] Clean up RPR regression test comments and add group-merge tests The regression test comments referenced internal implementation details -- static helper functions (mergeConsecutiveVars/Groups/Alts, mergeGroupPrefixSuffix, rprPatternChildrenEqual, tryUnwrapGroup, nfa_advance_begin/end/var/alt, nfa_route_to_elem, nfa_state_clone), the execRPR.c file name, the isAbsorbable/hasAbsorbableState flags, and a verbatim source conditional -- which drift when those helpers are renamed or refactored. Replace each with a comment describing the pattern transformation or NFA behavior exercised. Also add two EXPLAIN tests for prefix/suffix group merging: a 5-way PREFIX+SUFFIX merge (B A B (A B)+ A B A B -> b (a b){4,}) and a negative case where a differing prefix prevents the merge. --- src/test/regress/expected/rpr_base.out | 56 ++++++++++++++++------- src/test/regress/expected/rpr_explain.out | 4 +- src/test/regress/expected/rpr_nfa.out | 39 ++++++++-------- src/test/regress/sql/rpr_base.sql | 36 ++++++++------- src/test/regress/sql/rpr_explain.sql | 4 +- src/test/regress/sql/rpr_nfa.sql | 39 ++++++++-------- 6 files changed, 100 insertions(+), 78 deletions(-) diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index 302caea1e5d..f73758ee22d 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -3739,7 +3739,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Consecutive VAR merge: A A+ -> a{2,} --- Exercises the child->max == RPR_QUANTITY_INF branch in mergeConsecutiveVars, -- where a finite prev (A{1,1}) meets an infinite child (A+). EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -3808,8 +3807,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Consecutive GROUP merge: (A B){2} (A B)+ -> (a b){3,} --- Exercises the child->max == RPR_QUANTITY_INF branch in mergeConsecutiveGroups, --- where a finite prev ((A B){2,2}) meets an infinite child ((A B)+). +-- Where a finite prev ((A B){2,2}) meets an infinite child ((A B)+). EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4366,7 +4364,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Consecutive GROUP merge with unbounded: (A+) (A+) -> a{2,} --- Tests mergeConsecutiveGroups with child->max == INF EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4383,7 +4380,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Consecutive GROUP merge finite: (A{10}){20} -> a{200} --- Tests mergeConsecutiveGroups with both finite EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4400,7 +4396,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Different GROUP prevents merge: (A B){2} (C D){3} --- Tests mergeConsecutiveGroups flush previous EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4419,7 +4414,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Different children count prevents merge: (A B)+ (A B C)+ --- Tests rprPatternChildrenEqual length check EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4437,7 +4431,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- PREFIX only merge: A B (A B)+ -> (a b){2,} --- Tests mergeGroupPrefixSuffix: absorb preceding elements into GROUP min EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4454,7 +4447,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- SUFFIX only merge: (A B)+ A B -> (a b){2,} --- Tests mergeGroupPrefixSuffix: absorb following elements into GROUP min EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4471,7 +4463,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Multiple SUFFIX absorption with skipUntil: (A B)+ A B A B C --- Tests mergeGroupPrefixSuffix: skip absorbed suffix elements EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4488,8 +4479,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (7 rows) --- PREFIX merge with remaining prefix: A B C D (C D)+ --- Tests mergeGroupPrefixSuffix: trimmed list reconstruction +-- PREFIX merge with remaining prefix: A B C D (C D)+ -> A B (C D) {2,} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4507,8 +4497,26 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (7 rows) +-- cannot merge, prefix is different +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +PATTERN (A B C D C (C D) +) +DEFINE A AS val <= 25, B AS val > 25, + C AS val > 50, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d c (c d)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + -- PREFIX merge with quantifiers: A B* (A B*)+ -> (a b*){2,} --- Tests mergeGroupPrefixSuffix: quantifier comparison in rprPatternEqual EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4526,7 +4534,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- PREFIX merge with multiple quantifiers: A+ B* C? (A+ B* C?)+ -> (a+ b* c?){2,} --- Tests mergeGroupPrefixSuffix: complex quantifier patterns EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4544,7 +4551,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- SUFFIX merge with quantifiers: (A B*)+ A B* -> (a b*){2,} --- Tests mergeGroupPrefixSuffix: suffix with quantifiers EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4562,7 +4568,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING (7 rows) -- Unwrap GROUP{1,1}: ((A | B | C)) -> (a | b | c) --- Tests tryUnwrapGroup removing redundant outer GROUP EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4816,6 +4821,23 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (7 rows) +-- PREFIX+SUFFIX merge (5-way): B A B (A B)+ A B A B -> b (a b){4,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B A B (A B)+ A B A B) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: b (a b){4,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + -- Unwrap single-item ALT after dedup: (A | A)+ -> a+ -- ALT dedup reduces to single-item, then GROUP unwrap EXPLAIN (COSTS OFF) @@ -7069,7 +7091,7 @@ WINDOW w AS ( (10 rows) -- Test: (A+ | (A | B)+)* - nested alternation inside quantified group --- Previously caused infinite recursion in nfa_advance_alt when the inner +-- Previously caused infinite recursion in alternation handling when the inner -- BEGIN(+)'s skip jump was followed as an ALT branch pointer. SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end FROM (VALUES diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out index bcbf4f941ba..cc86d0aae30 100644 --- a/src/test/regress/expected/rpr_explain.out +++ b/src/test/regress/expected/rpr_explain.out @@ -541,7 +541,7 @@ WINDOW w AS ( (10 rows) -- Consecutive ALT merge followed by different ALT --- Tests mergeConsecutiveAlts flush on ALT change: (A|B){2} (C|D) +-- ((A | B) (A | B) (C | D)) -> (A|B){2} (C|D) CREATE VIEW rpr_ev_state_alt_merge_alt AS SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) @@ -582,7 +582,7 @@ WINDOW w AS ( (10 rows) -- Consecutive ALT merge followed by non-ALT element --- Tests mergeConsecutiveAlts flush on non-ALT: (A|B){2} c +-- ((A | B) (A | B) C) -> (A|B){2} C CREATE VIEW rpr_ev_state_alt_merge_nonalt AS SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index d7146168f2b..1fc85a365f6 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -534,7 +534,6 @@ WINDOW w AS ( (11 rows) -- Consecutive vars merged to fixed-length: (A B B)+ -> (A B{2})+ --- mergeConsecutiveVars produces B{2}; now absorbable with fixed-length check WITH test_absorb_consecutive AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -1192,7 +1191,7 @@ WINDOW w AS ( (5 rows) -- Optional reluctant group: (A B)?? C --- nfa_advance_begin: reluctant tries skip first, but skip path needs C +-- Reluctant group entry tries skip first, but the skip path needs C -- at row 1 which is A -> skip fails. Enter path succeeds: A(1) B(2) C(3). WITH test_optional_reluctant AS ( SELECT * FROM (VALUES @@ -1225,8 +1224,8 @@ WINDOW w AS ( -- Non-leading reluctant optional VAR: (B A?? C) -- Reluctant A?? should prefer to skip, matching B(1) C(2) with A left -- unmatched (match_end 2). The leading/group reluctant cases above go through --- the begin path; this exercises the non-leading skip path in --- nfa_route_to_elem, which must honor reluctant ordering too. +-- the begin path; this exercises the non-leading skip path, +-- which must honor reluctant ordering too. WITH test_nonleading_reluctant AS ( SELECT * FROM (VALUES (1, ARRAY['B']), @@ -1256,8 +1255,8 @@ WINDOW w AS ( (3 rows) -- Reluctant outer quantifier over a nullable reluctant body: SQL/RPR --- semantics call for the shortest (empty) match. The count=2 boundary and single-quantifier controls localize the behaviour: only @@ -1290,7 +1289,7 @@ ORDER BY id; -- Non-leading reluctant optional GROUP with a follower: (B (A X)?? C) -- Like the VAR case above but a multi-element group; it goes through the --- begin path (nfa_advance_begin), which already honors reluctant ordering. +-- begin path, which already honors reluctant ordering. -- Reluctant (A X)?? should skip, matching B(1) C(2), with the group skipped -- to the following C (not to FIN). WITH test_nonleading_reluctant_group AS ( @@ -1357,7 +1356,7 @@ WINDOW w AS ( -- Reluctant optional group skip-to-FIN -- When a reluctant optional group's skip path reaches FIN, the group --- entry path is abandoned (execRPR.c nfa_advance_begin). +-- entry path is abandoned. -- Pattern: C (A B)?? -- after C matches, the reluctant group (A B)?? -- prefers to skip. Skip goes to FIN (group is last element), so -- the match completes with just C. @@ -2381,11 +2380,11 @@ WINDOW w AS ( 4 | {B,_} | | (4 rows) --- A{3,5}? B (reluctant bounded mid-band): the VAR-level count in --- nfa_advance_var cycles through 3, 4, 5 within a single match --- attempt. Exercises the count > 2 && reluctant && !isAbsorbable --- branch (absorbability analysis excludes reluctant quantifiers, so --- isAbsorbable stays false for A). +-- A{3,5}? B (reluctant bounded mid-band): the VAR-level count +-- cycles through 3, 4, 5 within a single match attempt. Exercises +-- a reluctant bounded quantifier that absorbability analysis excludes +-- (reluctant quantifiers are never absorbable, so A stays +-- non-absorbable). WITH test_reluctant_mid_band AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -3364,7 +3363,7 @@ WINDOW w AS ( -- Nested END->END fast-forward -- When an inner group has a nullable body and count < min, the -- fast-forward path exits through the outer END, incrementing --- the outer group's count (execRPR.c nfa_advance_end). +-- the outer group's count. -- Pattern: ((A?){2,3}){2,3} -- nested groups, neither collapses -- because the optimizer cannot safely multiply non-exact quantifiers. -- Data has no A rows, forcing all-empty iterations via fast-forward. @@ -4067,7 +4066,7 @@ WINDOW w AS ( -- Non-absorbable context during absorption -- Pattern (A B)+ C: A,B in absorbable group, C is not. --- When END exits to C via nfa_state_clone, isAbsorbable becomes false. +-- When END exits to C, the cloned context becomes non-absorbable. WITH test_non_absorbable AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -4102,9 +4101,9 @@ WINDOW w AS ( 6 | {_} | | (6 rows) --- Absorption flags early return (!hasAbsorbableState) +-- Absorption skipped when no absorbable state remains -- Pattern (A B)+ C D with SKIP PAST LAST ROW --- After reaching C (non-absorbable), hasAbsorbableState becomes false. +-- After reaching C (non-absorbable), no absorbable state remains. -- On next row (D), the early return fires. WITH test_absorption_early_return AS ( SELECT * FROM (VALUES @@ -4212,11 +4211,11 @@ WINDOW w AS ( 4 | {_} | | (4 rows) --- Absorb skips non-absorbable context (!hasAbsorbableState) +-- Absorb skips a context with no absorbable state -- Pattern A+ | B C with SKIP PAST LAST ROW (only A+ branch absorbable). -- Row 1: B only -> Ctx1 takes B branch (non-absorbable), advances to C. --- Row 2: C,A -> Ctx1 C matches (hasAbsorbableState=false). Ctx2 takes A (absorbable). --- Absorption: Ctx1 !hasAbsorbableState -> skip. +-- Row 2: C,A -> Ctx1 C matches (no absorbable state). Ctx2 takes A (absorbable). +-- Absorption: Ctx1 has no absorbable state -> skip. WITH test_older_non_absorbable AS ( SELECT * FROM (VALUES (1, ARRAY['B', '_']), diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index 5e95859f758..7bb45b6bcf4 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -2471,7 +2471,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A+ A*) DEFINE A AS val > 0); -- Consecutive VAR merge: A A+ -> a{2,} --- Exercises the child->max == RPR_QUANTITY_INF branch in mergeConsecutiveVars, -- where a finite prev (A{1,1}) meets an infinite child (A+). EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -2500,8 +2499,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B)+ (A B)+) DEFINE A AS val <= 50, B AS val > 50); -- Consecutive GROUP merge: (A B){2} (A B)+ -> (a b){3,} --- Exercises the child->max == RPR_QUANTITY_INF branch in mergeConsecutiveGroups, --- where a finite prev ((A B){2,2}) meets an infinite child ((A B)+). +-- Where a finite prev ((A B){2,2}) meets an infinite child ((A B)+). EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2722,21 +2720,18 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING DEFINE A AS val <= 20, B AS val > 20 AND val <= 70, C AS val > 70); -- Consecutive GROUP merge with unbounded: (A+) (A+) -> a{2,} --- Tests mergeConsecutiveGroups with child->max == INF EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A+) (A+)) DEFINE A AS val > 0); -- Consecutive GROUP merge finite: (A{10}){20} -> a{200} --- Tests mergeConsecutiveGroups with both finite EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A{10}){20}) DEFINE A AS val > 0); -- Different GROUP prevents merge: (A B){2} (C D){3} --- Tests mergeConsecutiveGroups flush previous EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2745,7 +2740,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING C AS val > 50 AND val <= 75, D AS val > 75); -- Different children count prevents merge: (A B)+ (A B C)+ --- Tests rprPatternChildrenEqual length check EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2753,29 +2747,25 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING DEFINE A AS val <= 33, B AS val > 33 AND val <= 66, C AS val > 66); -- PREFIX only merge: A B (A B)+ -> (a b){2,} --- Tests mergeGroupPrefixSuffix: absorb preceding elements into GROUP min EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A B (A B)+) DEFINE A AS val <= 50, B AS val > 50); -- SUFFIX only merge: (A B)+ A B -> (a b){2,} --- Tests mergeGroupPrefixSuffix: absorb following elements into GROUP min EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B)+ A B) DEFINE A AS val <= 50, B AS val > 50); -- Multiple SUFFIX absorption with skipUntil: (A B)+ A B A B C --- Tests mergeGroupPrefixSuffix: skip absorbed suffix elements EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B)+ A B A B C) DEFINE A AS val <= 50, B AS val > 50 AND val <= 75, C AS val > 75); --- PREFIX merge with remaining prefix: A B C D (C D)+ --- Tests mergeGroupPrefixSuffix: trimmed list reconstruction +-- PREFIX merge with remaining prefix: A B C D (C D)+ -> A B (C D) {2,} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2783,8 +2773,16 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, C AS val > 50 AND val <= 75, D AS val > 75); +-- cannot merge, prefix is different +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +PATTERN (A B C D C (C D) +) +DEFINE A AS val <= 25, B AS val > 25, + C AS val > 50, D AS val > 75); + -- PREFIX merge with quantifiers: A B* (A B*)+ -> (a b*){2,} --- Tests mergeGroupPrefixSuffix: quantifier comparison in rprPatternEqual EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2792,7 +2790,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING DEFINE A AS val <= 50, B AS val > 50); -- PREFIX merge with multiple quantifiers: A+ B* C? (A+ B* C?)+ -> (a+ b* c?){2,} --- Tests mergeGroupPrefixSuffix: complex quantifier patterns EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2800,7 +2797,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60); -- SUFFIX merge with quantifiers: (A B*)+ A B* -> (a b*){2,} --- Tests mergeGroupPrefixSuffix: suffix with quantifiers EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2808,7 +2804,6 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING DEFINE A AS val <= 50, B AS val > 50); -- Unwrap GROUP{1,1}: ((A | B | C)) -> (a | b | c) --- Tests tryUnwrapGroup removing redundant outer GROUP EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2909,6 +2904,13 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A B A B (A B)+ A B A B) DEFINE A AS val <= 50, B AS val > 50); +-- PREFIX+SUFFIX merge (5-way): B A B (A B)+ A B A B -> b (a b){4,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B A B (A B)+ A B A B) + DEFINE A AS val <= 50, B AS val > 50); + -- Unwrap single-item ALT after dedup: (A | A)+ -> a+ -- ALT dedup reduces to single-item, then GROUP unwrap EXPLAIN (COSTS OFF) @@ -4303,7 +4305,7 @@ WINDOW w AS ( ); -- Test: (A+ | (A | B)+)* - nested alternation inside quantified group --- Previously caused infinite recursion in nfa_advance_alt when the inner +-- Previously caused infinite recursion in alternation handling when the inner -- BEGIN(+)'s skip jump was followed as an ALT branch pointer. SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end FROM (VALUES diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql index aa78ffed260..297ca78d54b 100644 --- a/src/test/regress/sql/rpr_explain.sql +++ b/src/test/regress/sql/rpr_explain.sql @@ -365,7 +365,7 @@ WINDOW w AS ( );'); -- Consecutive ALT merge followed by different ALT --- Tests mergeConsecutiveAlts flush on ALT change: (A|B){2} (C|D) +-- ((A | B) (A | B) (C | D)) -> (A|B){2} (C|D) CREATE VIEW rpr_ev_state_alt_merge_alt AS SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) @@ -388,7 +388,7 @@ WINDOW w AS ( );'); -- Consecutive ALT merge followed by non-ALT element --- Tests mergeConsecutiveAlts flush on non-ALT: (A|B){2} c +-- ((A | B) (A | B) C) -> (A|B){2} C CREATE VIEW rpr_ev_state_alt_merge_nonalt AS SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index 8daa0a73725..c82b8340977 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -408,7 +408,6 @@ WINDOW w AS ( ); -- Consecutive vars merged to fixed-length: (A B B)+ -> (A B{2})+ --- mergeConsecutiveVars produces B{2}; now absorbable with fixed-length check WITH test_absorb_consecutive AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -836,7 +835,7 @@ WINDOW w AS ( ); -- Optional reluctant group: (A B)?? C --- nfa_advance_begin: reluctant tries skip first, but skip path needs C +-- Reluctant group entry tries skip first, but the skip path needs C -- at row 1 which is A -> skip fails. Enter path succeeds: A(1) B(2) C(3). WITH test_optional_reluctant AS ( SELECT * FROM (VALUES @@ -863,8 +862,8 @@ WINDOW w AS ( -- Non-leading reluctant optional VAR: (B A?? C) -- Reluctant A?? should prefer to skip, matching B(1) C(2) with A left -- unmatched (match_end 2). The leading/group reluctant cases above go through --- the begin path; this exercises the non-leading skip path in --- nfa_route_to_elem, which must honor reluctant ordering too. +-- the begin path; this exercises the non-leading skip path, +-- which must honor reluctant ordering too. WITH test_nonleading_reluctant AS ( SELECT * FROM (VALUES (1, ARRAY['B']), @@ -888,8 +887,8 @@ WINDOW w AS ( ); -- Reluctant outer quantifier over a nullable reluctant body: SQL/RPR --- semantics call for the shortest (empty) match. The count=2 boundary and single-quantifier controls localize the behaviour: only @@ -915,7 +914,7 @@ ORDER BY id; -- Non-leading reluctant optional GROUP with a follower: (B (A X)?? C) -- Like the VAR case above but a multi-element group; it goes through the --- begin path (nfa_advance_begin), which already honors reluctant ordering. +-- begin path, which already honors reluctant ordering. -- Reluctant (A X)?? should skip, matching B(1) C(2), with the group skipped -- to the following C (not to FIN). WITH test_nonleading_reluctant_group AS ( @@ -968,7 +967,7 @@ WINDOW w AS ( -- Reluctant optional group skip-to-FIN -- When a reluctant optional group's skip path reaches FIN, the group --- entry path is abandoned (execRPR.c nfa_advance_begin). +-- entry path is abandoned. -- Pattern: C (A B)?? -- after C matches, the reluctant group (A B)?? -- prefers to skip. Skip goes to FIN (group is last element), so -- the match completes with just C. @@ -1667,11 +1666,11 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); --- A{3,5}? B (reluctant bounded mid-band): the VAR-level count in --- nfa_advance_var cycles through 3, 4, 5 within a single match --- attempt. Exercises the count > 2 && reluctant && !isAbsorbable --- branch (absorbability analysis excludes reluctant quantifiers, so --- isAbsorbable stays false for A). +-- A{3,5}? B (reluctant bounded mid-band): the VAR-level count +-- cycles through 3, 4, 5 within a single match attempt. Exercises +-- a reluctant bounded quantifier that absorbability analysis excludes +-- (reluctant quantifiers are never absorbable, so A stays +-- non-absorbable). WITH test_reluctant_mid_band AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -2406,7 +2405,7 @@ WINDOW w AS ( -- Nested END->END fast-forward -- When an inner group has a nullable body and count < min, the -- fast-forward path exits through the outer END, incrementing --- the outer group's count (execRPR.c nfa_advance_end). +-- the outer group's count. -- Pattern: ((A?){2,3}){2,3} -- nested groups, neither collapses -- because the optimizer cannot safely multiply non-exact quantifiers. -- Data has no A rows, forcing all-empty iterations via fast-forward. @@ -2954,7 +2953,7 @@ WINDOW w AS ( -- Non-absorbable context during absorption -- Pattern (A B)+ C: A,B in absorbable group, C is not. --- When END exits to C via nfa_state_clone, isAbsorbable becomes false. +-- When END exits to C, the cloned context becomes non-absorbable. WITH test_non_absorbable AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -2980,9 +2979,9 @@ WINDOW w AS ( C AS 'C' = ANY(flags) ); --- Absorption flags early return (!hasAbsorbableState) +-- Absorption skipped when no absorbable state remains -- Pattern (A B)+ C D with SKIP PAST LAST ROW --- After reaching C (non-absorbable), hasAbsorbableState becomes false. +-- After reaching C (non-absorbable), no absorbable state remains. -- On next row (D), the early return fires. WITH test_absorption_early_return AS ( SELECT * FROM (VALUES @@ -3065,11 +3064,11 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); --- Absorb skips non-absorbable context (!hasAbsorbableState) +-- Absorb skips a context with no absorbable state -- Pattern A+ | B C with SKIP PAST LAST ROW (only A+ branch absorbable). -- Row 1: B only -> Ctx1 takes B branch (non-absorbable), advances to C. --- Row 2: C,A -> Ctx1 C matches (hasAbsorbableState=false). Ctx2 takes A (absorbable). --- Absorption: Ctx1 !hasAbsorbableState -> skip. +-- Row 2: C,A -> Ctx1 C matches (no absorbable state). Ctx2 takes A (absorbable). +-- Absorption: Ctx1 has no absorbable state -> skip. WITH test_older_non_absorbable AS ( SELECT * FROM (VALUES (1, ARRAY['B', '_']), -- 2.50.1 (Apple Git-155)