From fa6e090e600acf527dc2e8877c4e61c698258409 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 19 May 2026 16:28:22 +0300 Subject: [PATCH v8] postgres_fdw: push down FUNCTION RTE into foreign joins A foreign join planning hook now considers a (foreign-table x function-RTE) INNER join as a push-down candidate when the function expression is IMMUTABLE and otherwise shippable. The remote query absorbs the function call as a FROM-list item (e.g. unnest(...) AS f(c1, c2, ...)), so the foreign side returns only rows that match the function-produced set and the join executes entirely on the remote. An IMMUTABLE function gives the same result on any server, so the same function RTE can be a push-down candidate for several distinct foreign servers without semantic risk. To keep the planner state consistent across those independent attempts, the per-call stub fpinfo for the function side lives on the joinrel's PgFdwRelationInfo (new outer_func_fpinfo / inner_func_fpinfo), never on the function rel itself, and the function side is detected via rtekind rather than fdw_private. set_foreign_rel_properties() propagates fdwroutine onto a joinrel that pairs a foreign rel with an RTE_FUNCTION rel so GetForeignJoinPaths gets called; the FDW retains full control over whether to actually generate a path. deparseRangeTblRef and deparseColumnRef gain a FUNCTION-RTE branch that emits the function expression and resolves Vars to the generated column aliases. --- contrib/postgres_fdw/deparse.c | 132 ++++- .../postgres_fdw/expected/postgres_fdw.out | 441 +++++++++++++++++ contrib/postgres_fdw/postgres_fdw.c | 461 +++++++++++++++++- contrib/postgres_fdw/postgres_fdw.h | 10 + contrib/postgres_fdw/sql/postgres_fdw.sql | 183 +++++++ src/backend/optimizer/util/relnode.c | 24 + 6 files changed, 1227 insertions(+), 24 deletions(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 2dcc6c8af1b..5567fdb8ed9 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -112,6 +112,7 @@ typedef struct deparse_expr_cxt appendStringInfo((buf), "%s%d.", REL_ALIAS_PREFIX, (varno)) #define SUBQUERY_REL_ALIAS_PREFIX "s" #define SUBQUERY_COL_ALIAS_PREFIX "c" +#define FUNCTION_REL_ALIAS_PREFIX "f" /* * Functions to determine whether an expression can be evaluated safely on @@ -2030,6 +2031,72 @@ deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, } } +/* + * Deparse a FUNCTION RTE absorbed into a foreign join. The function call(s) + * are emitted as a FROM-list item: + * + * AS f(c1, c2, ..., cN) + * + * For multi-function RTEs (SQL ROWS FROM (f1(), f2(), ...)), each + * function call appears comma-separated inside ROWS FROM(...). Column + * aliases c1..cN cover the union of every function's columns, in the + * order they appear; that matches the column ordering of the RTE. + */ +static void +deparseFunctionRangeTblRef(StringInfo buf, PlannerInfo *root, + RelOptInfo *foreignrel, RelOptInfo *scanrel, + List **params_list) +{ + RangeTblEntry *rte = planner_rt_fetch(foreignrel->relid, root); + deparse_expr_cxt context; + ListCell *lc; + bool multi_func; + int total_cols = 0; + int i; + + Assert(rte->rtekind == RTE_FUNCTION); + Assert(!rte->funcordinality); + Assert(list_length(rte->functions) >= 1); + + multi_func = list_length(rte->functions) > 1; + + context.buf = buf; + context.root = root; + context.foreignrel = scanrel; + context.scanrel = scanrel; + context.params_list = params_list; + + if (multi_func) + appendStringInfoString(buf, "ROWS FROM ("); + + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + + if (foreach_current_index(lc) > 0) + appendStringInfoString(buf, ", "); + deparseExpr((Expr *) rtfunc->funcexpr, &context); + total_cols += rtfunc->funccolcount; + } + + if (multi_func) + appendStringInfoChar(buf, ')'); + + /* Alias + generated column-name list. */ + appendStringInfo(buf, " %s%d", FUNCTION_REL_ALIAS_PREFIX, foreignrel->relid); + if (total_cols > 0) + { + appendStringInfoChar(buf, '('); + for (i = 1; i <= total_cols; i++) + { + if (i > 1) + appendStringInfoString(buf, ", "); + appendStringInfo(buf, "%s%d", SUBQUERY_COL_ALIAS_PREFIX, i); + } + appendStringInfoChar(buf, ')'); + } +} + /* * Append FROM clause entry for the given relation into buf. * Conditions from lower-level SEMI-JOINs are appended to additional_conds @@ -2040,7 +2107,22 @@ deparseRangeTblRef(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, bool make_subquery, Index ignore_rel, List **ignore_conds, List **additional_conds, List **params_list) { - PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; + PgFdwRelationInfo *fpinfo; + + /* + * For a function RTE absorbed into a foreign join, deparse the function + * expression as a FROM-list item and return. The stub fpinfo set up by + * foreign_join_ok() may or may not be present here. + */ + if (foreignrel->rtekind == RTE_FUNCTION) + { + Assert(!make_subquery); + deparseFunctionRangeTblRef(buf, root, foreignrel, foreignrel, + params_list); + return; + } + + fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; /* Should only be called in these cases. */ Assert(IS_SIMPLE_REL(foreignrel) || IS_JOIN_REL(foreignrel)); @@ -2712,6 +2794,54 @@ static void deparseColumnRef(StringInfo buf, int varno, int varattno, RangeTblEntry *rte, bool qualify_col) { + /* + * Function RTE columns: emit as f.c, matching the + * aliases generated by deparseFunctionRangeTblRef(). A whole-row Var + * (varattno == 0) is rendered as ROW(f.c1, ..., f.c) + * where N is the total column count of the function RTE (including all + * ROWS FROM (...) members). System attributes such as ctid have no + * meaning for function RTEs and are rejected. + */ + if (rte->rtekind == RTE_FUNCTION) + { + if (varattno == 0) + { + int ncols = list_length(rte->eref->colnames); + int i; + + if (qualify_col) + { + appendStringInfoString(buf, "CASE WHEN ("); + appendStringInfo(buf, "%s%d.", FUNCTION_REL_ALIAS_PREFIX, varno); + appendStringInfoString(buf, "*)::text IS NOT NULL THEN "); + } + + appendStringInfoString(buf, "ROW("); + for (i = 1; i <= ncols; i++) + { + if (i > 1) + appendStringInfoString(buf, ", "); + appendStringInfo(buf, "%s%d.%s%d", + FUNCTION_REL_ALIAS_PREFIX, varno, + SUBQUERY_COL_ALIAS_PREFIX, i); + } + appendStringInfoChar(buf, ')'); + + if (qualify_col) + appendStringInfoString(buf, " END"); + return; + } + + if (varattno < 0) + elog(ERROR, + "system attribute reference to a function RTE is not supported in foreign join pushdown"); + + if (qualify_col) + appendStringInfo(buf, "%s%d.", FUNCTION_REL_ALIAS_PREFIX, varno); + appendStringInfo(buf, "%s%d", SUBQUERY_COL_ALIAS_PREFIX, varattno); + return; + } + /* We support fetching the remote side's CTID and OID. */ if (varattno == SelfItemPointerAttributeNumber) { diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index e90289e4ab1..a626b5e5ace 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2885,6 +2885,447 @@ SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c (10 rows) ALTER VIEW v4 OWNER TO regress_view_owner; +-- =================================================================== +-- Foreign-join with FUNCTION RTE pushdown (IMMUTABLE functions only) +-- =================================================================== +-- IMMUTABLE function: unnest of constant array can be pushed down +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t1.c3 FROM ft1 t1, unnest(ARRAY[1, 5, 10, 100]::int[]) AS u(id) +WHERE t1.c1 = u.id ORDER BY t1.c1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ + Sort + Output: t1.c1, t1.c3 + Sort Key: t1.c1 + -> Foreign Scan + Output: t1.c1, t1.c3 + Relations: (public.ft1 t1) INNER JOIN (Function u) + Remote SQL: SELECT r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN unnest('{1,5,10,100}'::integer[]) f2(c1) ON (((r1."C 1" = f2.c1)))) +(7 rows) + +SELECT t1.c1, t1.c3 FROM ft1 t1, unnest(ARRAY[1, 5, 10, 100]::int[]) AS u(id) +WHERE t1.c1 = u.id ORDER BY t1.c1; + c1 | c3 +-----+------- + 1 | 00001 + 5 | 00005 + 10 | 00010 + 100 | 00100 +(4 rows) + +-- IMMUTABLE function: generate_series with constant args +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1 FROM ft1 t1, generate_series(1, 4) AS g(id) +WHERE t1.c1 = g.id ORDER BY t1.c1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Sort + Output: t1.c1 + Sort Key: t1.c1 + -> Foreign Scan + Output: t1.c1 + Relations: (public.ft1 t1) INNER JOIN (Function g) + Remote SQL: SELECT r1."C 1" FROM ("S 1"."T 1" r1 INNER JOIN generate_series(1, 4) f2(c1) ON (((r1."C 1" = f2.c1)))) +(7 rows) + +SELECT t1.c1 FROM ft1 t1, generate_series(1, 4) AS g(id) +WHERE t1.c1 = g.id ORDER BY t1.c1; + c1 +---- + 1 + 2 + 3 + 4 +(4 rows) + +-- VOLATILE function (random) must NOT be pushed down +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1 FROM ft1 t1, generate_series(1, 4 + (random() * 0)::int) AS g(id) +WHERE t1.c1 = g.id; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: t1.c1 + Hash Cond: (g.id = t1.c1) + -> Function Scan on pg_catalog.generate_series g + Output: g.id + Function Call: generate_series(1, (4 + ((random() * '0'::double precision))::integer)) + -> Hash + Output: t1.c1 + -> Foreign Scan on public.ft1 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" +(11 rows) + +-- WITH ORDINALITY must NOT be pushed down (limitation of this implementation) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, u.ord +FROM ft1 t1, unnest(ARRAY[1, 5, 10]::int[]) WITH ORDINALITY AS u(id, ord) +WHERE t1.c1 = u.id; + QUERY PLAN +------------------------------------------------------------ + Hash Join + Output: t1.c1, u.ord + Hash Cond: (t1.c1 = u.id) + -> Foreign Scan on public.ft1 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" + -> Hash + Output: u.ord, u.id + -> Function Scan on pg_catalog.unnest u + Output: u.ord, u.id + Function Call: unnest('{1,5,10}'::integer[]) +(11 rows) + +-- Same function RTE joined with two different foreign servers: planner picks +-- one absorption + a local join with the second foreign server. The fact +-- that the function is IMMUTABLE makes this safe even if both sides chose to +-- absorb it independently. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c1 +FROM ft1 t1, ft6 t2, unnest(ARRAY[1, 5, 10, 100]::int[]) AS u(id) +WHERE t1.c1 = u.id AND t2.c1 = u.id ORDER BY t1.c1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ + Merge Join + Output: t1.c1, t2.c1 + Merge Cond: (t1.c1 = u.id) + -> Foreign Scan on public.ft1 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST + -> Sort + Output: t2.c1, u.id + Sort Key: t2.c1 + -> Foreign Scan + Output: t2.c1, u.id + Relations: (public.ft6 t2) INNER JOIN (Function u) + Remote SQL: SELECT r2.c1, f3.c1 FROM ("S 1"."T 4" r2 INNER JOIN unnest('{1,5,10,100}'::integer[]) f3(c1) ON (((r2.c1 = f3.c1)))) +(13 rows) + +-- Cost-based selection between two foreign servers: ft1 ("S 1"."T 1") has +-- 1000 rows, ft6 ("S 1"."T 4") has ~33 rows. The same query shape gets a +-- different push-down target depending on a predicate that changes the +-- effective cardinality of one side -- the function "jumps" to whichever +-- foreign scan benefits more from being pre-filtered. +ANALYZE ft1; +ANALYZE ft6; +-- No extra predicate: ft1 is the bigger side, function absorbed there. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c1 +FROM ft1 t1, ft6 t2, unnest(ARRAY[3, 6, 9, 12, 15, 18]::int[]) AS u(id) +WHERE t1.c1 = u.id AND t2.c1 = u.id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------- + Hash Join + Output: t1.c1, t2.c1 + Hash Cond: (t1.c1 = t2.c1) + -> Foreign Scan + Output: t1.c1, u.id + Relations: (public.ft1 t1) INNER JOIN (Function u) + Remote SQL: SELECT r1."C 1", f3.c1 FROM ("S 1"."T 1" r1 INNER JOIN unnest('{3,6,9,12,15,18}'::integer[]) f3(c1) ON (((r1."C 1" = f3.c1)))) + -> Hash + Output: t2.c1 + -> Foreign Scan on public.ft6 t2 + Output: t2.c1 + Remote SQL: SELECT c1 FROM "S 1"."T 4" +(12 rows) + +-- Selective predicate on ft1.c3 (not in the eqclass) shrinks ft1 to a +-- handful of remote rows; now ft6 is effectively the bigger side and the +-- function is absorbed into ft6 instead. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c1 +FROM ft1 t1, ft6 t2, unnest(ARRAY[3, 6, 9, 12, 15, 18]::int[]) AS u(id) +WHERE t1.c1 = u.id AND t2.c1 = u.id AND t1.c3 < '00010'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + Output: t1.c1, t2.c1 + Join Filter: (t1.c1 = u.id) + -> Foreign Scan on public.ft1 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" WHERE ((c3 < '00010')) + -> Materialize + Output: t2.c1, u.id + -> Foreign Scan + Output: t2.c1, u.id + Relations: (public.ft6 t2) INNER JOIN (Function u) + Remote SQL: SELECT r2.c1, f3.c1 FROM ("S 1"."T 4" r2 INNER JOIN unnest('{3,6,9,12,15,18}'::integer[]) f3(c1) ON (((r2.c1 = f3.c1)))) +(12 rows) + +-- The remaining scenarios reuse a dedicated foreign table to cover the +-- corner cases of FUNCTION RTE push-down: function-first FROM, record +-- return type rejection, whole-row reference, UPDATE...FROM..., and +-- multi-function ROWS FROM. +CREATE TABLE base_tbl_fn (a int, b int); +INSERT INTO base_tbl_fn + SELECT g, g + 100 FROM generate_series(1, 30) g; +CREATE FOREIGN TABLE remote_tbl (a int, b int) + SERVER loopback OPTIONS (table_name 'base_tbl_fn'); +-- The function RTE appears first in FROM; the foreign relation must be +-- found by scanning fs_base_relids for an RTE_RELATION rather than +-- using scan->fs_relid blindly. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM unnest(array[2, 3, 4]) n, remote_tbl r WHERE r.a = n; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------ + Foreign Scan + Output: n.n, r.a, r.b + Relations: (Function n) INNER JOIN (public.remote_tbl r) + Remote SQL: SELECT f1.c1, r2.a, r2.b FROM (unnest('{2,3,4}'::integer[]) f1(c1) INNER JOIN public.base_tbl_fn r2 ON (((f1.c1 = r2.a)))) +(4 rows) + +SELECT * FROM unnest(array[2, 3, 4]) n, remote_tbl r +WHERE r.a = n ORDER BY r.a; + n | a | b +---+---+----- + 2 | 2 | 102 + 3 | 3 | 103 + 4 | 4 | 104 +(3 rows) + +-- A function returning record (composite) is forbidden; pushing it +-- would yield a remote "column definition list is required" error. +-- function_rte_pushdown_ok() rejects it via TYPEFUNC_SCALAR. +CREATE OR REPLACE FUNCTION f_ret_record() RETURNS record AS $$ + SELECT (1, 2)::record +$$ LANGUAGE SQL IMMUTABLE; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s FROM remote_tbl rt, f_ret_record() AS s(a int, b int) +WHERE s.a = rt.a; + QUERY PLAN +------------------------------------------------------ + Hash Join + Output: s.* + Hash Cond: (rt.a = s.a) + -> Foreign Scan on public.remote_tbl rt + Output: rt.a, rt.b + Remote SQL: SELECT a FROM public.base_tbl_fn + -> Hash + Output: s.*, s.a + -> Function Scan on public.f_ret_record s + Output: s.*, s.a + Function Call: f_ret_record() +(11 rows) + +DROP FUNCTION f_ret_record(); +-- UPDATE ... FROM unnest() with a complex element type; area(box) +-- yields the value to match. The locking machinery for the +-- non-relation RTE generates a whole-row Var that deparseColumnRef +-- emits as ROW(f.c1, ...). +UPDATE remote_tbl r SET b = 999 +FROM unnest(array[box '((2,3),(-2,-3))']) AS t(bx) +WHERE r.a = area(t.bx); +SELECT * FROM remote_tbl WHERE a IN (23, 24, 25) ORDER BY a; + a | b +----+----- + 23 | 123 + 24 | 999 + 25 | 125 +(3 rows) + +-- The same shape with CASE and RETURNING; exercises the DirectModify +-- path and the executor's tuple-desc reconstruction. +UPDATE remote_tbl r + SET b = CASE WHEN random() >= 0 THEN 5 ELSE 0 END + FROM unnest(array[box '((2,3),(-2,-3))']) AS t(bx) + WHERE r.a = area(t.bx) RETURNING a, b; + a | b +----+--- + 24 | 5 +(1 row) + +-- RETURNING a whole-row reference to the function RTE. Without the +-- per-RTE function metadata being preserved on the DirectModify path +-- (FdwDirectModifyPrivateFunctions / FdwDirectModifyPrivateMinRTIndex), +-- this would fail with "input of anonymous composite types is not +-- implemented". +UPDATE remote_tbl r SET b = 7 + FROM unnest(array[box '((2,3),(-2,-3))'], + array[int '1']) AS t(bx, i) + WHERE r.a = area(t.bx) RETURNING a, b, t; + a | b | t +----+---+--------------------- + 24 | 7 | ("(2,3),(-2,-3)",1) +(1 row) + +-- ROWS FROM (...) with several functions. Force pushdown because the +-- cost model otherwise picks a local plan. +SET enable_hashjoin = off; +SET enable_mergejoin = off; +SET enable_nestloop = off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a, t.n, t.s + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Sort + Output: r.a, t.n, t.s + Sort Key: r.a + -> Foreign Scan + Output: r.a, t.n, t.s + Relations: (public.remote_tbl r) INNER JOIN (Function t) + Remote SQL: SELECT r1.a, f2.c1, f2.c2 FROM (public.base_tbl_fn r1 INNER JOIN ROWS FROM (unnest('{3,6,9}'::integer[]), generate_series(11, 13)) f2(c1, c2) ON (((r1.a = f2.c1)))) +(7 rows) + +SELECT r.a, t.n, t.s + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; + a | n | s +---+---+---- + 3 | 3 | 11 + 6 | 6 | 12 + 9 | 9 | 13 +(3 rows) + +-- Whole-row Var on the absorbed function side (e.g. via a cast to +-- text). Exercises both deparseColumnRef whole-row branch and the +-- get_tupdesc_for_join_scan_tuples() RTE_FUNCTION metadata path. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t::text, r.a + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: ((t.*)::text), r.a + Sort Key: r.a + -> Foreign Scan + Output: (t.*)::text, r.a + Relations: (public.remote_tbl r) INNER JOIN (Function t) + Remote SQL: SELECT CASE WHEN (f2.*)::text IS NOT NULL THEN ROW(f2.c1, f2.c2) END, r1.a FROM (public.base_tbl_fn r1 INNER JOIN ROWS FROM (unnest('{3,6,9}'::integer[]), generate_series(11, 13)) f2(c1, c2) ON (((r1.a = f2.c1)))) +(7 rows) + +SELECT t::text, r.a + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; + t | a +--------+--- + (3,11) | 3 + (6,12) | 6 + (9,13) | 9 +(3 rows) + +-- Check whole-row Var represented by function on the nullable +-- left join side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT u, ft5.c1, ft5.c2, ft4.c1, ft4.c2 FROM ft5 LEFT JOIN +(ft4 JOIN unnest(array[1,2], array[3,4]) as u (u1, u2) ON ft4.c1=u1) +ON ft5.c1 = ft4.c1 WHERE ft5.c1 < 10 ORDER BY ft5.c1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: u.*, ft5.c1, ft5.c2, ft4.c1, ft4.c2 + Relations: (public.ft5) LEFT JOIN ((public.ft4) INNER JOIN (Function u)) + Remote SQL: SELECT CASE WHEN (f3.*)::text IS NOT NULL THEN ROW(f3.c1, f3.c2) END, r1.c1, r1.c2, r2.c1, r2.c2 FROM ("S 1"."T 4" r1 LEFT JOIN ("S 1"."T 3" r2 INNER JOIN ROWS FROM (unnest('{1,2}'::integer[]), unnest('{3,4}'::integer[])) f3(c1, c2) ON (((r2.c1 = f3.c1)))) ON (((r1.c1 = r2.c1)))) WHERE ((r1.c1 < 10)) ORDER BY r1.c1 ASC NULLS LAST +(4 rows) + +SELECT u, ft5.c1, ft5.c2, ft4.c1, ft4.c2 FROM ft5 LEFT JOIN +(ft4 JOIN unnest(array[1,2], array[3,4]) as u (u1, u2) ON ft4.c1=u1) +ON ft5.c1 = ft4.c1 WHERE ft5.c1 < 10 ORDER BY ft5.c1; + u | c1 | c2 | c1 | c2 +---+----+----+----+---- + | 3 | 4 | | + | 6 | 7 | | + | 9 | 10 | | +(3 rows) + +RESET enable_hashjoin; +RESET enable_mergejoin; +RESET enable_nestloop; +-- Volatile function in ROWS FROM disqualifies the whole RTE. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a FROM remote_tbl r, + ROWS FROM (unnest(array[3, 6, 9]), + generate_series(1, 4 + (random() * 0)::int)) AS t(n, s) + WHERE r.a = t.n; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------ + Merge Join + Output: r.a + Merge Cond: (t.n = r.a) + -> Sort + Output: t.n + Sort Key: t.n + -> Function Scan on t + Output: t.n + Function Call: unnest('{3,6,9}'::integer[]), generate_series(1, (4 + ((random() * '0'::double precision))::integer)) + -> Sort + Output: r.a + Sort Key: r.a + -> Foreign Scan on public.remote_tbl r + Output: r.a + Remote SQL: SELECT a FROM public.base_tbl_fn +(15 rows) + +-- LATERAL function referencing a foreign Var: postgres_fdw's +-- foreign-join pushdown rejects this via the joinrel->lateral_relids +-- check; the plan is therefore a local NestLoop + FunctionScan. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a, t.x FROM remote_tbl r, LATERAL unnest(array[r.a]) AS t(x) +WHERE r.a <= 3 ORDER BY r.a; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Nested Loop + Output: r.a, t.x + -> Foreign Scan on public.remote_tbl r + Output: r.a, r.b + Remote SQL: SELECT a FROM public.base_tbl_fn WHERE ((a <= 3)) ORDER BY a ASC NULLS LAST + -> Function Scan on pg_catalog.unnest t + Output: t.x + Function Call: unnest(ARRAY[r.a]) +(8 rows) + +-- Outer joins with the function RTE are not pushed down (only INNER +-- joins are supported by function_rte_pushdown_ok()). +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a, t.n FROM remote_tbl r + LEFT JOIN unnest(array[1, 2, 3]) AS t(n) ON r.a = t.n + WHERE r.a <= 5 ORDER BY r.a; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Merge Left Join + Output: r.a, t.n + Merge Cond: (r.a = t.n) + -> Foreign Scan on public.remote_tbl r + Output: r.a, r.b + Remote SQL: SELECT a FROM public.base_tbl_fn WHERE ((a <= 5)) ORDER BY a ASC NULLS LAST + -> Sort + Output: t.n + Sort Key: t.n + -> Function Scan on pg_catalog.unnest t + Output: t.n + Function Call: unnest('{1,2,3}'::integer[]) +(12 rows) + +-- SEMI join (EXISTS) with a function RTE is also not pushed down. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a FROM remote_tbl r + WHERE EXISTS (SELECT 1 FROM unnest(array[3, 6, 9]) AS t(n) WHERE t.n = r.a) + ORDER BY r.a; + QUERY PLAN +-------------------------------------------------------------------------------- + Merge Semi Join + Output: r.a + Merge Cond: (r.a = t.n) + -> Foreign Scan on public.remote_tbl r + Output: r.a, r.b + Remote SQL: SELECT a FROM public.base_tbl_fn ORDER BY a ASC NULLS LAST + -> Sort + Output: t.n + Sort Key: t.n + -> Function Scan on pg_catalog.unnest t + Output: t.n + Function Call: unnest('{3,6,9}'::integer[]) +(12 rows) + +DROP FOREIGN TABLE remote_tbl; +DROP TABLE base_tbl_fn; -- ==================================================================== -- Check that userid to use when querying the remote table is correctly -- propagated into foreign rels present in subqueries under an UNION ALL diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 0a589f8db74..90d24ac4d56 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -31,6 +31,7 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/inherit.h" #include "optimizer/optimizer.h" @@ -88,6 +89,22 @@ enum FdwScanPrivateIndex * of join, added when the scan is join */ FdwScanPrivateRelations, + + /* + * List of per-RTE function metadata, indexed by base RTI offset. Each + * element is either NULL (for non-RTE_FUNCTION rels in this scan) or a + * list of three-element lists (funcid, funcrettype, funccollation) -- one + * inner list per function in the RTE. Allows the executor to rebuild + * TupleDesc entries for whole-row references to function RTEs. + */ + FdwScanPrivateFunctions, + + /* + * Integer node: minimum base RT index covered by the scan, used to + * translate scan-local indexes to estate-rtable indexes after setrefs.c + * flattens rtables. + */ + FdwScanPrivateMinRTIndex, }; /* @@ -124,6 +141,11 @@ enum FdwModifyPrivateIndex * 2) Boolean flag showing if the remote query has a RETURNING clause * 3) Integer list of attribute numbers retrieved by RETURNING, if any * 4) Boolean flag showing if we set the command es_processed + * 5) Per-RTE function metadata (mirrors FdwScanPrivateFunctions; lets + * the executor rebuild TupleDesc entries for whole-row Vars over + * function RTEs absorbed into a foreign join) + * 6) Integer node: minimum base RT index of the scan (mirrors + * FdwScanPrivateMinRTIndex) */ enum FdwDirectModifyPrivateIndex { @@ -135,6 +157,10 @@ enum FdwDirectModifyPrivateIndex FdwDirectModifyPrivateRetrievedAttrs, /* set-processed flag (as a Boolean node) */ FdwDirectModifyPrivateSetProcessed, + /* Per-RTE function metadata, indexed by base RTI offset */ + FdwDirectModifyPrivateFunctions, + /* Integer node: minimum base RT index in the scan */ + FdwDirectModifyPrivateMinRTIndex, }; /* @@ -741,6 +767,9 @@ static bool foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel, static List *get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel); static List *get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel); +static Relids get_base_relids(PlannerInfo *root, RelOptInfo *rel); +static int get_min_base_rti(PlannerInfo *root, RelOptInfo *rel); +static List *get_functions_data(PlannerInfo *root, RelOptInfo *rel); static void add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, Path *epq_path, List *restrictlist); static void add_foreign_grouping_paths(PlannerInfo *root, @@ -1635,9 +1664,26 @@ postgresGetForeignPlan(PlannerInfo *root, fdw_private = list_make3(makeString(sql.data), retrieved_attrs, makeInteger(fpinfo->fetch_size)); + + /* + * Position FdwScanPrivateRelations: either the EXPLAIN relation string + * (joins/upper rels) or a NULL placeholder, so that subsequent indexes + * stay valid for the base-rel scan case. + */ if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel)) fdw_private = lappend(fdw_private, makeString(fpinfo->relation_name)); + else + fdw_private = lappend(fdw_private, NULL); + + /* + * FdwScanPrivateFunctions / FdwScanPrivateMinRTIndex carry the metadata + * the executor needs to rebuild TupleDesc entries for whole-row Vars + * pointing at RTE_FUNCTION rels absorbed into the foreign scan. + */ + fdw_private = lappend(fdw_private, get_functions_data(root, foreignrel)); + fdw_private = lappend(fdw_private, + makeInteger(get_min_base_rti(root, foreignrel))); /* * Create the ForeignScan node for the given relation. @@ -1658,9 +1704,15 @@ postgresGetForeignPlan(PlannerInfo *root, /* * Construct a tuple descriptor for the scan tuples handled by a foreign join. + * + * 'rtfuncdata' is the FdwScanPrivateFunctions list saved at plan time, and + * 'rtoffset' is the difference between the executor's RT indexes and the + * scan-local RT indexes captured in that list. Both may be 0/NIL when the + * scan has no RTE_FUNCTION dependents. */ static TupleDesc -get_tupdesc_for_join_scan_tuples(ForeignScanState *node) +get_tupdesc_for_join_scan_tuples(ForeignScanState *node, + List *rtfuncdata, int rtoffset) { ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan; EState *estate = node->ss.ps.state; @@ -1696,13 +1748,68 @@ get_tupdesc_for_join_scan_tuples(ForeignScanState *node) if (!IsA(var, Var) || var->varattno != 0) continue; rte = list_nth(estate->es_range_table, var->varno - 1); - if (rte->rtekind != RTE_RELATION) - continue; - reltype = get_rel_type_id(rte->relid); - if (!OidIsValid(reltype)) - continue; - att->atttypid = reltype; - /* shouldn't need to change anything else */ + + if (rte->rtekind == RTE_RELATION) + { + reltype = get_rel_type_id(rte->relid); + if (!OidIsValid(reltype)) + continue; + att->atttypid = reltype; + /* shouldn't need to change anything else */ + } + else if (rte->rtekind == RTE_FUNCTION) + { + /* + * A whole-row Var points at a FUNCTION RTE absorbed into the + * foreign join. Synthesize an anonymous composite TupleDesc from + * the per-function return-type metadata we saved at plan time; + * the deparser emits these as ROW(f.c1, f.c2, ...). + * + * For an upperrel scan (the only path that reaches here) + * postgresGetForeignPlan always builds rtfuncdata via + * get_functions_data(), so it is never NIL; the per-RTE slot for + * the function RTE referenced by this Var must likewise be + * populated. + */ + List *funcdata; + TupleDesc rte_tupdesc; + int num_funcs; + int attnum; + ListCell *lc1, + *lc2; + + Assert(rtfuncdata != NIL); + funcdata = list_nth(rtfuncdata, var->varno - rtoffset); + Assert(funcdata != NIL); + num_funcs = list_length(funcdata); + Assert(num_funcs == list_length(rte->eref->colnames)); + rte_tupdesc = CreateTemplateTupleDesc(num_funcs); + + attnum = 1; + forboth(lc1, funcdata, lc2, rte->eref->colnames) + { + List *fdata = lfirst_node(List, lc1); + char *colname = strVal(lfirst(lc2)); + Oid funcrettype; + Oid funccollation; + + funcrettype = lsecond_node(Integer, fdata)->ival; + funccollation = lthird_node(Integer, fdata)->ival; + + if (!OidIsValid(funcrettype) || funcrettype == RECORDOID) + elog(ERROR, + "could not determine return type for function in foreign scan"); + + TupleDescInitEntry(rte_tupdesc, (AttrNumber) attnum, colname, + funcrettype, -1, 0); + TupleDescInitEntryCollation(rte_tupdesc, (AttrNumber) attnum, + funccollation); + attnum++; + } + + assign_record_type_typmod(rte_tupdesc); + att->atttypmod = rte_tupdesc->tdtypmod; + } } return tupdesc; } @@ -1738,14 +1845,30 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) /* * Identify which user to do the remote access as. This should match what - * ExecCheckPermissions() does. + * ExecCheckPermissions() does. For a join, scan the base relids until we + * find an RTE_RELATION (the foreign-table side); ignore any RTE_FUNCTION + * absorbed into the join, which contributes no relation OID to look up. */ userid = OidIsValid(fsplan->checkAsUser) ? fsplan->checkAsUser : GetUserId(); + rte = NULL; if (fsplan->scan.scanrelid > 0) + { rtindex = fsplan->scan.scanrelid; + rte = exec_rt_fetch(rtindex, estate); + } else - rtindex = bms_next_member(fsplan->fs_base_relids, -1); - rte = exec_rt_fetch(rtindex, estate); + { + rtindex = -1; + while ((rtindex = bms_next_member(fsplan->fs_base_relids, rtindex)) >= 0) + { + rte = exec_rt_fetch(rtindex, estate); + if (rte != NULL && rte->rtekind == RTE_RELATION) + break; + rte = NULL; + } + if (rte == NULL) + elog(ERROR, "could not locate a foreign relation RTE in foreign scan"); + } /* Get info about foreign table. */ table = GetForeignTable(rte->relid); @@ -1788,8 +1911,19 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) } else { + List *rtfuncdata = (List *) list_nth(fsplan->fdw_private, + FdwScanPrivateFunctions); + int min_base_rti = intVal(list_nth(fsplan->fdw_private, + FdwScanPrivateMinRTIndex)); + int rtoffset = bms_next_member(fsplan->fs_base_relids, -1) - + min_base_rti; + + Assert(min_base_rti > 0); + Assert(rtoffset >= 0); + fsstate->rel = NULL; - fsstate->tupdesc = get_tupdesc_for_join_scan_tuples(node); + fsstate->tupdesc = get_tupdesc_for_join_scan_tuples(node, rtfuncdata, + rtoffset); } fsstate->attinmeta = TupleDescGetAttInMetadata(fsstate->tupdesc); @@ -2829,6 +2963,10 @@ postgresPlanDirectModify(PlannerInfo *root, makeBoolean((retrieved_attrs != NIL)), retrieved_attrs, makeBoolean(plan->canSetTag)); + fscan->fdw_private = lappend(fscan->fdw_private, + get_functions_data(root, foreignrel)); + fscan->fdw_private = lappend(fscan->fdw_private, + makeInteger(get_min_base_rti(root, foreignrel))); /* * Update the foreign-join-related fields. @@ -2942,7 +3080,26 @@ postgresBeginDirectModify(ForeignScanState *node, int eflags) TupleDesc tupdesc; if (fsplan->scan.scanrelid == 0) - tupdesc = get_tupdesc_for_join_scan_tuples(node); + { + /* + * DirectModify on a foreign join: use the per-RTE function + * metadata saved at plan time so a whole-row Var pointing at a + * function RTE absorbed into the join can be rebuilt into a + * usable TupleDesc (e.g. RETURNING t for a join with UNNEST(..., + * ...) AS t(bx, i)). + */ + List *rtfuncdata = (List *) list_nth(fsplan->fdw_private, + FdwDirectModifyPrivateFunctions); + int min_base_rti = intVal(list_nth(fsplan->fdw_private, + FdwDirectModifyPrivateMinRTIndex)); + int rtoffset = bms_next_member(fsplan->fs_base_relids, -1) - + min_base_rti; + + Assert(min_base_rti > 0); + Assert(rtoffset >= 0); + + tupdesc = get_tupdesc_for_join_scan_tuples(node, rtfuncdata, rtoffset); + } else tupdesc = RelationGetDescr(dmstate->rel); @@ -3055,7 +3212,8 @@ postgresExplainForeignScan(ForeignScanState *node, ExplainState *es) * We do that here, not when the plan is created, because we can't know * what aliases ruleutils.c will assign at plan creation time. */ - if (list_length(fdw_private) > FdwScanPrivateRelations) + if (list_length(fdw_private) > FdwScanPrivateRelations && + list_nth(fdw_private, FdwScanPrivateRelations) != NULL) { StringInfoData relations; char *rawrelations; @@ -3105,6 +3263,22 @@ postgresExplainForeignScan(ForeignScanState *node, ExplainState *es) rti += rtoffset; Assert(bms_is_member(rti, plan->fs_base_relids)); rte = rt_fetch(rti, es->rtable); + + /* + * If a function RTE was absorbed into the foreign join, + * render it as "Function " since we have no foreign + * relid. + */ + if (rte->rtekind == RTE_FUNCTION) + { + refname = (char *) list_nth(es->rtable_names, rti - 1); + if (refname == NULL) + refname = rte->eref->aliasname; + appendStringInfo(&relations, "Function %s", + quote_identifier(refname)); + continue; + } + Assert(rte->rtekind == RTE_RELATION); /* This logic should agree with explain.c's ExplainTargetRel */ relname = get_rel_name(rte->relid); @@ -3480,8 +3654,17 @@ estimate_path_cost_size(PlannerInfo *root, /* For join we expect inner and outer relations set */ Assert(fpinfo->innerrel && fpinfo->outerrel); - fpinfo_i = (PgFdwRelationInfo *) fpinfo->innerrel->fdw_private; - fpinfo_o = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private; + /* + * For a FUNCTION RTE absorbed into the join, use the stub fpinfo + * we built in foreign_join_ok(), since the function rel itself + * has no fdw_private. + */ + fpinfo_i = fpinfo->inner_func_fpinfo ? + fpinfo->inner_func_fpinfo : + (PgFdwRelationInfo *) fpinfo->innerrel->fdw_private; + fpinfo_o = fpinfo->outer_func_fpinfo ? + fpinfo->outer_func_fpinfo : + (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private; /* Estimate of number of rows in cross product */ nrows = fpinfo_i->rows * fpinfo_o->rows; @@ -6639,6 +6822,181 @@ semijoin_target_ok(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, return ok; } +/* + * get_base_relids + * Return the set of base relids referenced by a foreign scan rel. + * + * For an upper rel we use the all-query relids minus the outer joins; + * otherwise the rel's own relids minus the outer joins. The result matches + * the relids that create_foreignscan_plan() ultimately uses for + * ForeignScan.fs_base_relids, so it is suitable for any tagging we want to + * store via plan-private state. + */ +static Relids +get_base_relids(PlannerInfo *root, RelOptInfo *rel) +{ + Relids relids; + + if (rel->reloptkind == RELOPT_UPPER_REL) + relids = root->all_query_rels; + else + relids = rel->relids; + + return bms_difference(relids, root->outer_join_rels); +} + +/* + * get_min_base_rti + * Lowest base RT index in the foreign scan rel. + * + * After setrefs.c flattens the rtable, the scan-local indexes saved in + * plan-private data can be translated to estate indexes by adding + * (ForeignScan.fs_base_relids min - this value). Captured at plan time + * because create_foreignscan_plan() computes the same value internally. + */ +static int +get_min_base_rti(PlannerInfo *root, RelOptInfo *rel) +{ + Relids relids = get_base_relids(root, rel); + + return bms_next_member(relids, -1); +} + +/* + * get_functions_data + * Build the per-RTE function metadata list saved as + * FdwScanPrivateFunctions. + * + * The result list is indexed by base RT index relative to the lowest base + * RT index of the scan. Each element is either NULL (for non-RTE_FUNCTION + * base rels in this scan) or a List of List of three Integer nodes: + * (funcid, funcrettype, funccollation) -- one inner list per RangeTblFunction. + * + * Only RTE_FUNCTION relids actually appearing in the foreign scan's + * fs_base_relids contribute; others are placeholders so that the consumer + * can index into the result by RTI offset. + */ +static List * +get_functions_data(PlannerInfo *root, RelOptInfo *rel) +{ + List *rtfuncdata = NIL; + Relids fscan_relids = get_base_relids(root, rel); + int i; + + for (i = 0; i < root->simple_rel_array_size; i++) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + List *funcdata = NIL; + ListCell *lc; + + if (rte == NULL || i == 0 || + !bms_is_member(i, fscan_relids) || + rte->rtekind != RTE_FUNCTION) + { + rtfuncdata = lappend(rtfuncdata, NULL); + continue; + } + + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + Oid funcrettype; + Oid funccollation; + TupleDesc tupdesc; + Oid funcid = InvalidOid; + + get_expr_result_type(rtfunc->funcexpr, &funcrettype, &tupdesc); + + if (!OidIsValid(funcrettype) || funcrettype == RECORDOID) + elog(ERROR, + "could not determine return type for function in foreign scan"); + + funccollation = exprCollation(rtfunc->funcexpr); + + if (IsA(rtfunc->funcexpr, FuncExpr)) + funcid = ((FuncExpr *) rtfunc->funcexpr)->funcid; + + funcdata = lappend(funcdata, + list_make3(makeInteger(funcid), + makeInteger(funcrettype), + makeInteger(funccollation))); + } + + rtfuncdata = lappend(rtfuncdata, funcdata); + } + + return rtfuncdata; +} + +/* + * Check if a relation is a FUNCTION RTE that can be absorbed into a remote + * join. Every function in the RTE must + * + * - return a well-defined scalar type -- we don't ship records/composite + * since the remote server cannot reconstruct a column definition list + * and our deparser does not emit one; + * - have a shippable expression with no mutable subnodes -- is_foreign_expr() + * rejects volatile/stable functions through contain_mutable_functions(), + * so the IMMUTABLE-only restriction is implicit; + * - not contain SubPlans -- we'd otherwise need to ship sub-results to + * the remote, which we do not implement. + * + * WITH ORDINALITY is not supported yet. + */ +static bool +function_rte_pushdown_ok(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *fdwrel) +{ + RangeTblEntry *rte; + ListCell *lc; + + if (rel->rtekind != RTE_FUNCTION) + return false; + rte = planner_rt_fetch(rel->relid, root); + if (rte->rtekind != RTE_FUNCTION) + return false; + if (rte->funcordinality) + return false; + + /* + * Reject up-front any function RTE that lateral-references another + * relation: foreign-join push-down would need to parameterise the remote + * query per outer row, which we don't support, and even considering the + * path is expensive on the planner side. The surrounding lateral_relids + * check in postgresGetForeignJoinPaths() would normally bail out for the + * joinrel, but doing the check here avoids walking the function + * expression entirely. + */ + if (!bms_is_empty(rel->lateral_relids)) + return false; + + Assert(list_length(rte->functions) >= 1); + + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + TypeFuncClass functypclass; + Oid funcrettype; + TupleDesc tupdesc; + + functypclass = get_expr_result_type(rtfunc->funcexpr, + &funcrettype, &tupdesc); + if (functypclass != TYPEFUNC_SCALAR) + return false; + if (!OidIsValid(funcrettype) || + funcrettype == RECORDOID || + funcrettype == VOIDOID) + return false; + + if (contain_subplans(rtfunc->funcexpr)) + return false; + if (!is_foreign_expr(root, fdwrel, (Expr *) rtfunc->funcexpr)) + return false; + } + + return true; +} + /* * Assess whether the join between inner and outer relations can be pushed down * to the foreign server. As a side effect, save information we obtain in this @@ -6654,6 +7012,8 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, PgFdwRelationInfo *fpinfo_i; ListCell *lc; List *joinclauses; + bool outer_is_function = false; + bool inner_is_function = false; /* * We support pushing down INNER, LEFT, RIGHT, FULL OUTER and SEMI joins. @@ -6672,15 +7032,70 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, return false; /* - * If either of the joining relations is marked as unsafe to pushdown, the - * join can not be pushed down. + * Detect mixed (foreign x function-RTE) cases. Only INNER joins are + * supported initially. We dispatch on rtekind here so that the same + * function RTE can be absorbed into joins on multiple foreign servers + * (each call gets its own stub fpinfo and rechecks shippability for the + * specific server). */ fpinfo = (PgFdwRelationInfo *) joinrel->fdw_private; - fpinfo_o = (PgFdwRelationInfo *) outerrel->fdw_private; - fpinfo_i = (PgFdwRelationInfo *) innerrel->fdw_private; - if (!fpinfo_o || !fpinfo_o->pushdown_safe || - !fpinfo_i || !fpinfo_i->pushdown_safe) - return false; + if (jointype == JOIN_INNER && innerrel->rtekind == RTE_FUNCTION && + (fpinfo_o = (PgFdwRelationInfo *) outerrel->fdw_private) && + fpinfo_o->pushdown_safe && + function_rte_pushdown_ok(root, innerrel, outerrel)) + { + inner_is_function = true; + } + else if (jointype == JOIN_INNER && outerrel->rtekind == RTE_FUNCTION && + (fpinfo_i = (PgFdwRelationInfo *) innerrel->fdw_private) && + fpinfo_i->pushdown_safe && + function_rte_pushdown_ok(root, outerrel, innerrel)) + { + outer_is_function = true; + } + else + { + fpinfo_o = (PgFdwRelationInfo *) outerrel->fdw_private; + fpinfo_i = (PgFdwRelationInfo *) innerrel->fdw_private; + if (!fpinfo_o || !fpinfo_o->pushdown_safe || + !fpinfo_i || !fpinfo_i->pushdown_safe) + return false; + } + + /* + * If one side is a function RTE, allocate a stub fpinfo so the rest of + * this function and the cost estimator can treat it uniformly. We hand + * the stub to the joinrel's deparser via the same path the foreign side + * uses, but we never permanently attach it to the function rel's + * fdw_private (different joinrels may pair the same function RTE with + * different foreign servers). + */ + if (inner_is_function) + { + fpinfo_i = palloc0_object(PgFdwRelationInfo); + fpinfo_i->pushdown_safe = true; + fpinfo_i->server = fpinfo_o->server; + fpinfo_i->relation_name = psprintf("%u", innerrel->relid); + fpinfo_i->rows = innerrel->rows; + fpinfo_i->width = innerrel->reltarget->width; + fpinfo_i->retrieved_rows = innerrel->rows; + fpinfo_i->rel_startup_cost = 0; + fpinfo_i->rel_total_cost = 0; + fpinfo->inner_func_fpinfo = fpinfo_i; + } + else if (outer_is_function) + { + fpinfo_o = palloc0_object(PgFdwRelationInfo); + fpinfo_o->pushdown_safe = true; + fpinfo_o->server = fpinfo_i->server; + fpinfo_o->relation_name = psprintf("%u", outerrel->relid); + fpinfo_o->rows = outerrel->rows; + fpinfo_o->width = outerrel->reltarget->width; + fpinfo_o->retrieved_rows = outerrel->rows; + fpinfo_o->rel_startup_cost = 0; + fpinfo_o->rel_total_cost = 0; + fpinfo->outer_func_fpinfo = fpinfo_o; + } /* * If joining relations have local conditions, those conditions are diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index a2bb1ff352c..5b2ffcf06f7 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -106,6 +106,16 @@ typedef struct PgFdwRelationInfo /* joinclauses contains only JOIN/ON conditions for an outer join */ List *joinclauses; /* List of RestrictInfo */ + /* + * If a FUNCTION RTE was absorbed into this join, these point at the stub + * PgFdwRelationInfo for the function side (paired with the + * outerrel/innerrel), so the cost estimator and deparser can find it + * without consulting the function rel's fdw_private. At most one of + * outer_func_fpinfo/inner_func_fpinfo is set. + */ + struct PgFdwRelationInfo *outer_func_fpinfo; + struct PgFdwRelationInfo *inner_func_fpinfo; + /* Upper relation information */ UpperRelationKind stage; diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index dfc58beb0d2..ec791e2127d 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -790,6 +790,189 @@ SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; ALTER VIEW v4 OWNER TO regress_view_owner; +-- =================================================================== +-- Foreign-join with FUNCTION RTE pushdown (IMMUTABLE functions only) +-- =================================================================== +-- IMMUTABLE function: unnest of constant array can be pushed down +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t1.c3 FROM ft1 t1, unnest(ARRAY[1, 5, 10, 100]::int[]) AS u(id) +WHERE t1.c1 = u.id ORDER BY t1.c1; +SELECT t1.c1, t1.c3 FROM ft1 t1, unnest(ARRAY[1, 5, 10, 100]::int[]) AS u(id) +WHERE t1.c1 = u.id ORDER BY t1.c1; + +-- IMMUTABLE function: generate_series with constant args +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1 FROM ft1 t1, generate_series(1, 4) AS g(id) +WHERE t1.c1 = g.id ORDER BY t1.c1; +SELECT t1.c1 FROM ft1 t1, generate_series(1, 4) AS g(id) +WHERE t1.c1 = g.id ORDER BY t1.c1; + +-- VOLATILE function (random) must NOT be pushed down +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1 FROM ft1 t1, generate_series(1, 4 + (random() * 0)::int) AS g(id) +WHERE t1.c1 = g.id; + +-- WITH ORDINALITY must NOT be pushed down (limitation of this implementation) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, u.ord +FROM ft1 t1, unnest(ARRAY[1, 5, 10]::int[]) WITH ORDINALITY AS u(id, ord) +WHERE t1.c1 = u.id; + +-- Same function RTE joined with two different foreign servers: planner picks +-- one absorption + a local join with the second foreign server. The fact +-- that the function is IMMUTABLE makes this safe even if both sides chose to +-- absorb it independently. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c1 +FROM ft1 t1, ft6 t2, unnest(ARRAY[1, 5, 10, 100]::int[]) AS u(id) +WHERE t1.c1 = u.id AND t2.c1 = u.id ORDER BY t1.c1; + +-- Cost-based selection between two foreign servers: ft1 ("S 1"."T 1") has +-- 1000 rows, ft6 ("S 1"."T 4") has ~33 rows. The same query shape gets a +-- different push-down target depending on a predicate that changes the +-- effective cardinality of one side -- the function "jumps" to whichever +-- foreign scan benefits more from being pre-filtered. +ANALYZE ft1; +ANALYZE ft6; +-- No extra predicate: ft1 is the bigger side, function absorbed there. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c1 +FROM ft1 t1, ft6 t2, unnest(ARRAY[3, 6, 9, 12, 15, 18]::int[]) AS u(id) +WHERE t1.c1 = u.id AND t2.c1 = u.id; +-- Selective predicate on ft1.c3 (not in the eqclass) shrinks ft1 to a +-- handful of remote rows; now ft6 is effectively the bigger side and the +-- function is absorbed into ft6 instead. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c1 +FROM ft1 t1, ft6 t2, unnest(ARRAY[3, 6, 9, 12, 15, 18]::int[]) AS u(id) +WHERE t1.c1 = u.id AND t2.c1 = u.id AND t1.c3 < '00010'; + +-- The remaining scenarios reuse a dedicated foreign table to cover the +-- corner cases of FUNCTION RTE push-down: function-first FROM, record +-- return type rejection, whole-row reference, UPDATE...FROM..., and +-- multi-function ROWS FROM. +CREATE TABLE base_tbl_fn (a int, b int); +INSERT INTO base_tbl_fn + SELECT g, g + 100 FROM generate_series(1, 30) g; +CREATE FOREIGN TABLE remote_tbl (a int, b int) + SERVER loopback OPTIONS (table_name 'base_tbl_fn'); + +-- The function RTE appears first in FROM; the foreign relation must be +-- found by scanning fs_base_relids for an RTE_RELATION rather than +-- using scan->fs_relid blindly. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM unnest(array[2, 3, 4]) n, remote_tbl r WHERE r.a = n; +SELECT * FROM unnest(array[2, 3, 4]) n, remote_tbl r +WHERE r.a = n ORDER BY r.a; + +-- A function returning record (composite) is forbidden; pushing it +-- would yield a remote "column definition list is required" error. +-- function_rte_pushdown_ok() rejects it via TYPEFUNC_SCALAR. +CREATE OR REPLACE FUNCTION f_ret_record() RETURNS record AS $$ + SELECT (1, 2)::record +$$ LANGUAGE SQL IMMUTABLE; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s FROM remote_tbl rt, f_ret_record() AS s(a int, b int) +WHERE s.a = rt.a; +DROP FUNCTION f_ret_record(); + +-- UPDATE ... FROM unnest() with a complex element type; area(box) +-- yields the value to match. The locking machinery for the +-- non-relation RTE generates a whole-row Var that deparseColumnRef +-- emits as ROW(f.c1, ...). +UPDATE remote_tbl r SET b = 999 +FROM unnest(array[box '((2,3),(-2,-3))']) AS t(bx) +WHERE r.a = area(t.bx); +SELECT * FROM remote_tbl WHERE a IN (23, 24, 25) ORDER BY a; + +-- The same shape with CASE and RETURNING; exercises the DirectModify +-- path and the executor's tuple-desc reconstruction. +UPDATE remote_tbl r + SET b = CASE WHEN random() >= 0 THEN 5 ELSE 0 END + FROM unnest(array[box '((2,3),(-2,-3))']) AS t(bx) + WHERE r.a = area(t.bx) RETURNING a, b; + +-- RETURNING a whole-row reference to the function RTE. Without the +-- per-RTE function metadata being preserved on the DirectModify path +-- (FdwDirectModifyPrivateFunctions / FdwDirectModifyPrivateMinRTIndex), +-- this would fail with "input of anonymous composite types is not +-- implemented". +UPDATE remote_tbl r SET b = 7 + FROM unnest(array[box '((2,3),(-2,-3))'], + array[int '1']) AS t(bx, i) + WHERE r.a = area(t.bx) RETURNING a, b, t; + +-- ROWS FROM (...) with several functions. Force pushdown because the +-- cost model otherwise picks a local plan. +SET enable_hashjoin = off; +SET enable_mergejoin = off; +SET enable_nestloop = off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a, t.n, t.s + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; +SELECT r.a, t.n, t.s + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; + +-- Whole-row Var on the absorbed function side (e.g. via a cast to +-- text). Exercises both deparseColumnRef whole-row branch and the +-- get_tupdesc_for_join_scan_tuples() RTE_FUNCTION metadata path. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t::text, r.a + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; +SELECT t::text, r.a + FROM remote_tbl r, ROWS FROM (unnest(array[3, 6, 9]), + generate_series(11, 13)) AS t(n, s) + WHERE r.a = t.n ORDER BY r.a; + +-- Check whole-row Var represented by function on the nullable +-- left join side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT u, ft5.c1, ft5.c2, ft4.c1, ft4.c2 FROM ft5 LEFT JOIN +(ft4 JOIN unnest(array[1,2], array[3,4]) as u (u1, u2) ON ft4.c1=u1) +ON ft5.c1 = ft4.c1 WHERE ft5.c1 < 10 ORDER BY ft5.c1; +SELECT u, ft5.c1, ft5.c2, ft4.c1, ft4.c2 FROM ft5 LEFT JOIN +(ft4 JOIN unnest(array[1,2], array[3,4]) as u (u1, u2) ON ft4.c1=u1) +ON ft5.c1 = ft4.c1 WHERE ft5.c1 < 10 ORDER BY ft5.c1; +RESET enable_hashjoin; +RESET enable_mergejoin; +RESET enable_nestloop; + +-- Volatile function in ROWS FROM disqualifies the whole RTE. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a FROM remote_tbl r, + ROWS FROM (unnest(array[3, 6, 9]), + generate_series(1, 4 + (random() * 0)::int)) AS t(n, s) + WHERE r.a = t.n; + +-- LATERAL function referencing a foreign Var: postgres_fdw's +-- foreign-join pushdown rejects this via the joinrel->lateral_relids +-- check; the plan is therefore a local NestLoop + FunctionScan. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a, t.x FROM remote_tbl r, LATERAL unnest(array[r.a]) AS t(x) +WHERE r.a <= 3 ORDER BY r.a; + +-- Outer joins with the function RTE are not pushed down (only INNER +-- joins are supported by function_rte_pushdown_ok()). +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a, t.n FROM remote_tbl r + LEFT JOIN unnest(array[1, 2, 3]) AS t(n) ON r.a = t.n + WHERE r.a <= 5 ORDER BY r.a; + +-- SEMI join (EXISTS) with a function RTE is also not pushed down. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT r.a FROM remote_tbl r + WHERE EXISTS (SELECT 1 FROM unnest(array[3, 6, 9]) AS t(n) WHERE t.n = r.a) + ORDER BY r.a; + +DROP FOREIGN TABLE remote_tbl; +DROP TABLE base_tbl_fn; + -- ==================================================================== -- Check that userid to use when querying the remote table is correctly -- propagated into foreign rels present in subqueries under an UNION ALL diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 3fc2c2f71d0..f21ae1baeb2 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -746,6 +746,30 @@ set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel, joinrel->fdwroutine = outer_rel->fdwroutine; } } + else if (OidIsValid(outer_rel->serverid) && + inner_rel->rtekind == RTE_FUNCTION) + { + /* + * One side is a foreign relation, the other side is a function RTE. + * If the function is IMMUTABLE, the FDW can absorb the function call + * into the remote query (the result is identical regardless of which + * server evaluates it). Let the FDW decide whether the join is + * actually shippable; here we just propagate the FDW routine so the + * FDW gets a chance. + */ + joinrel->serverid = outer_rel->serverid; + joinrel->userid = outer_rel->userid; + joinrel->useridiscurrent = outer_rel->useridiscurrent; + joinrel->fdwroutine = outer_rel->fdwroutine; + } + else if (OidIsValid(inner_rel->serverid) && + outer_rel->rtekind == RTE_FUNCTION) + { + joinrel->serverid = inner_rel->serverid; + joinrel->userid = inner_rel->userid; + joinrel->useridiscurrent = inner_rel->useridiscurrent; + joinrel->fdwroutine = inner_rel->fdwroutine; + } } /* -- 2.39.5 (Apple Git-154)