-- -- Q4 failure when there are not stats present -- dbt3=# select * from pg_statistic where starelid = 'lineitem'::regclass; -- should return 0 rows (probably for others rels too) dbt3=# explain (costs off, plan_advice) SELECT o_orderpriority, count(*) AS order_count FROM orders WHERE o_orderdate >= DATE '1993-07-01' AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH AND EXISTS ( SELECT * FROM lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate ) GROUP BY o_orderpriority ORDER BY o_orderpriority; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------ GroupAggregate Group Key: orders.o_orderpriority -> Sort Sort Key: orders.o_orderpriority -> Nested Loop -> Unique -> Sort Sort Key: lineitem.l_orderkey -> Gather Workers Planned: 2 -> HashAggregate Group Key: lineitem.l_orderkey -> Parallel Seq Scan on lineitem Filter: (l_commitdate < l_receiptdate) -> Index Scan using pk_orders on orders Index Cond: (o_orderkey = lineitem.l_orderkey) Filter: ((o_orderdate >= '1993-07-01'::date) AND (o_orderdate < '1993-10-01 00:00:00'::timestamp without time zone)) Generated Plan Advice: JOIN_ORDER(lineitem orders) NESTED_LOOP_PLAIN(orders) SEQ_SCAN(lineitem) INDEX_SCAN(orders public.pk_orders) GATHER(lineitem) SEMIJOIN_UNIQUE(lineitem) NO_GATHER(orders) (25 rows) -- apply it dbt3=# set pg_plan_advice.advice = 'JOIN_ORDER(lineitem orders) NESTED_LOOP_PLAIN(orders) SEQ_SCAN(lineitem) INDEX_SCAN(orders public.pk_orders) GATHER(lineitem) SEMIJOIN_UNIQUE(lineitem) NO_GATHER(orders)'; dbt3=# explain (costs off) SELECT o_orderpriority, count(*) AS order_count FROM orders WHERE o_orderdate >= DATE '1993-07-01' AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH AND EXISTS ( SELECT * FROM lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate ) GROUP BY o_orderpriority ORDER BY o_orderpriority; -- HERE's the ERROR ERROR: unique semijoin found for relids (b 3) but not observed during planning -- but without advices it works dbt3=# set pg_plan_advice.advice = ''; SET dbt3=# explain (costs off) SELECT o_orderpriority, count(*) AS order_count FROM orders WHERE o_orderdate >= DATE '1993-07-01' AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH AND EXISTS ( SELECT * FROM lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate ) GROUP BY o_orderpriority ORDER BY o_orderpriority; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------ GroupAggregate Group Key: orders.o_orderpriority -> Sort Sort Key: orders.o_orderpriority -> Nested Loop -> Unique -> Sort Sort Key: lineitem.l_orderkey -> Gather Workers Planned: 2 -> HashAggregate Group Key: lineitem.l_orderkey -> Parallel Seq Scan on lineitem Filter: (l_commitdate < l_receiptdate) -> Index Scan using pk_orders on orders Index Cond: (o_orderkey = lineitem.l_orderkey) Filter: ((o_orderdate >= '1993-07-01'::date) AND (o_orderdate < '1993-10-01 00:00:00'::timestamp without time zone)) (17 rows) dbt3=# -- now just gather stats for 1 table out of many dbt3=# analyze lineitem ; ANALYZE -- ensure adivce is set properly: dbt3=# set pg_plan_advice.advice = 'JOIN_ORDER(lineitem orders) NESTED_LOOP_PLAIN(orders) SEQ_SCAN(lineitem) INDEX_SCAN(orders public.pk_orders) GATHER(lineitem) SEMIJOIN_UNIQUE(lineitem) NO_GATHER(orders)'; SET -- and it works (so only we poured in lineitem stats, right?), we still get two failures though : dbt3=# explain (costs off) SELECT o_orderpriority, count(*) AS order_count FROM orders WHERE o_orderdate >= DATE '1993-07-01' AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH AND EXISTS ( SELECT * FROM lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate ) GROUP BY o_orderpriority ORDER BY o_orderpriority; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------ GroupAggregate Group Key: orders.o_orderpriority -> Sort Sort Key: orders.o_orderpriority -> Hash Right Semi Join Hash Cond: (lineitem.l_orderkey = orders.o_orderkey) -> Gather Workers Planned: 2 -> Parallel Seq Scan on lineitem Filter: (l_commitdate < l_receiptdate) -> Hash -> Index Scan using pk_orders on orders Filter: ((o_orderdate >= '1993-07-01'::date) AND (o_orderdate < '1993-10-01 00:00:00'::timestamp without time zone)) Supplied Plan Advice: SEQ_SCAN(lineitem) /* matched */ INDEX_SCAN(orders public.pk_orders) /* matched */ JOIN_ORDER(lineitem orders) /* matched, conflicting */ NESTED_LOOP_PLAIN(orders) /* matched, conflicting, failed */ SEMIJOIN_UNIQUE(lineitem) /* matched, conflicting, failed */ GATHER(lineitem) /* matched */ NO_GATHER(orders) /* matched */ -- let's go back to without stats: SET allow_system_table_mods = ON; TRUNCATE pg_statistic; -- and grab new psql session and let's do again (same advice): dbt3=# set pg_plan_advice.advice = 'JOIN_ORDER(lineitem orders) NESTED_LOOP_PLAIN(orders) SEQ_SCAN(lineitem) INDEX_SCAN(orders public.pk_orders) GATHER(lineitem) SEMIJOIN_UNIQUE(lineitem) NO_GATHER(orders)'; SET dbt3=# explain (costs off) SELECT o_orderpriority, count(*) AS order_count FROM orders WHERE o_orderdate >= DATE '1993-07-01' AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH AND EXISTS ( SELECT * FROM lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate ) GROUP BY o_orderpriority ORDER BY o_orderpriority; ERROR: unique semijoin found for relids (b 3) but not observed during planning dbt3=#