From e413583502bc7602ad1803fa3a8d74d265147002 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Mon, 23 Oct 2023 12:31:51 -0700 Subject: [PATCH 1/2] squash! Row pattern recognition patch (tests). Add long-table tests. --- src/test/regress/expected/rpr.out | 68 +++++++++++++++++++++++++++++++ src/test/regress/sql/rpr.sql | 56 +++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index 8f8254d3b2..806dee5ce6 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -635,6 +635,74 @@ DOWN AS price < PREV(price) company2 | 07-10-2023 | 1300 | | | | | | | 0 (20 rows) +-- +-- Bigger datasets +-- +CREATE TEMP TABLE long_stock ( + company TEXT, + tdate DATE, + price INTEGER +); +INSERT INTO long_stock SELECT 'company1', DATE '2023-07-01' + i, 200 + 100 * sin(i) + FROM generate_series(1, 1000) i; +INSERT INTO long_stock SELECT 'company2', DATE '2023-07-01' + i, 300 + 200 * sin(i + 3) + FROM generate_series(1, 1000) i; +SELECT company, count(*), min(price), round(avg(price)) AS avg, max(price) + FROM long_stock GROUP BY company; + company | count | min | avg | max +----------+-------+-----+-----+----- + company1 | 1000 | 100 | 200 | 300 + company2 | 1000 | 100 | 300 | 500 +(2 rows) + +-- long test using PREV. Expect approximately 1000 / (2*pi) = 159 periods of the +-- sinusoids to match. +WITH q AS ( + SELECT company, tdate, first_value(price) OVER w + FROM long_stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price >= PREV(price), + DOWN AS price <= PREV(price) + ) +) SELECT company, count(first_value) AS matches + FROM q GROUP BY company; + company | matches +----------+--------- + company1 | 159 + company2 | 159 +(2 rows) + +-- match everything, with multiple matching variables per row (stresses +-- implementations susceptible to Cartesian explosion) +WITH q AS ( + SELECT company, tdate, first_value(price) OVER w + FROM long_stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+ B+ C+) + DEFINE + A AS TRUE, + B AS TRUE, + C AS TRUE + ) +) SELECT company, count(first_value) AS matches + FROM q GROUP BY company; + company | matches +----------+--------- + company1 | 1 + company2 | 1 +(2 rows) + -- -- Error cases -- diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index 38309652f9..51c9245d3b 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -271,6 +271,62 @@ UP AS price > PREV(price), DOWN AS price < PREV(price) ); +-- +-- Bigger datasets +-- + +CREATE TEMP TABLE long_stock ( + company TEXT, + tdate DATE, + price INTEGER +); + +INSERT INTO long_stock SELECT 'company1', DATE '2023-07-01' + i, 200 + 100 * sin(i) + FROM generate_series(1, 1000) i; +INSERT INTO long_stock SELECT 'company2', DATE '2023-07-01' + i, 300 + 200 * sin(i + 3) + FROM generate_series(1, 1000) i; + +SELECT company, count(*), min(price), round(avg(price)) AS avg, max(price) + FROM long_stock GROUP BY company; + +-- long test using PREV. Expect approximately 1000 / (2*pi) = 159 periods of the +-- sinusoids to match. +WITH q AS ( + SELECT company, tdate, first_value(price) OVER w + FROM long_stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price >= PREV(price), + DOWN AS price <= PREV(price) + ) +) SELECT company, count(first_value) AS matches + FROM q GROUP BY company; + +-- match everything, with multiple matching variables per row (stresses +-- implementations susceptible to Cartesian explosion) +WITH q AS ( + SELECT company, tdate, first_value(price) OVER w + FROM long_stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+ B+ C+) + DEFINE + A AS TRUE, + B AS TRUE, + C AS TRUE + ) +) SELECT company, count(first_value) AS matches + FROM q GROUP BY company; + -- -- Error cases -- -- 2.39.2