From ab356c4f081417ef69517e6ef8449db143ef29e1 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Thu, 2 Apr 2026 10:54:30 +0900 Subject: [PATCH 8/8] Add JIT compilation support for RPR PREV/NEXT navigation --- src/backend/jit/llvm/llvmjit_expr.c | 72 +++++++++++++++++++++-------- src/test/regress/expected/rpr.out | 31 +++++++++++++ src/test/regress/sql/rpr.sql | 27 +++++++++++ 3 files changed, 111 insertions(+), 19 deletions(-) diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index d158e37e7b5..4901b2a7ff4 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -127,6 +127,9 @@ llvm_compile_expr(ExprState *state) LLVMValueRef v_aggvalues; LLVMValueRef v_aggnulls; + /* RPR navigation: when true, EEOP_OUTER_VAR reloads from econtext */ + bool has_rpr_nav; + instr_time starttime; instr_time deform_starttime; instr_time endtime; @@ -300,19 +303,16 @@ llvm_compile_expr(ExprState *state) * RPR navigation opcodes (PREV/NEXT) swap ecxt_outertuple to a different * row mid-expression. The JIT code loads v_outervalues and v_outernulls * once in the entry block and reuses them for all EEOP_OUTER_VAR steps. - * After a slot swap, these pointers become stale because the new slot has - * its own tts_values/tts_isnull arrays. Fall back to the interpreter for - * these expressions. + * After a slot swap, these cached pointers become stale because the new + * slot has its own tts_values/tts_isnull arrays. * - * XXX To JIT-compile these expressions properly, the NAV_SET and - * NAV_RESTORE handlers would need to reload the tts_values and tts_isnull - * pointers from the new slot. However, LLVM uses SSA (Static Single - * Assignment) form where each value is defined exactly once. When - * different basic blocks produce different values for the same pointer, - * LLVM requires PHI nodes at the merge point to select the correct one. - * Without that plumbing, OUTER_VAR steps after a slot swap would read - * from the wrong pointer. + * When RPR navigation opcodes are present, EEOP_OUTER_VAR reloads the + * slot pointer from econtext->ecxt_outertuple on every access instead of + * using the cached entry-block values. This avoids the SSA/PHI + * complexity while keeping the rest of the expression JIT-compiled. + * Expressions without RPR navigation use the cached values as before. */ + has_rpr_nav = false; if (parent && IsA(parent, WindowAggState) && ((WindowAgg *) parent->plan)->rpPattern != NULL) { @@ -323,9 +323,8 @@ llvm_compile_expr(ExprState *state) if (opcode == EEOP_RPR_NAV_SET || opcode == EEOP_RPR_NAV_RESTORE) { - LLVMDeleteFunction(eval_fn); - LLVMDisposeBuilder(b); - return false; + has_rpr_nav = true; + break; } } } @@ -492,8 +491,37 @@ llvm_compile_expr(ExprState *state) } else if (opcode == EEOP_OUTER_VAR) { - v_values = v_outervalues; - v_nulls = v_outernulls; + if (has_rpr_nav) + { + /* + * RPR navigation swaps ecxt_outertuple + * mid-expression. Reload slot pointer from + * econtext on every access so we read from the + * current (possibly swapped) slot. + */ + LLVMValueRef v_tmpslot; + + v_tmpslot = l_load_struct_gep(b, + StructExprContext, + v_econtext, + FIELDNO_EXPRCONTEXT_OUTERTUPLE, + "v_outerslot_reload"); + v_values = l_load_struct_gep(b, + StructTupleTableSlot, + v_tmpslot, + FIELDNO_TUPLETABLESLOT_VALUES, + "v_outervalues_reload"); + v_nulls = l_load_struct_gep(b, + StructTupleTableSlot, + v_tmpslot, + FIELDNO_TUPLETABLESLOT_ISNULL, + "v_outernulls_reload"); + } + else + { + v_values = v_outervalues; + v_nulls = v_outernulls; + } } else if (opcode == EEOP_SCAN_VAR) { @@ -2467,10 +2495,16 @@ llvm_compile_expr(ExprState *state) break; case EEOP_RPR_NAV_SET: + build_EvalXFunc(b, mod, "ExecEvalRPRNavSet", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + case EEOP_RPR_NAV_RESTORE: - /* unreachable: filtered out by the pre-scan above */ - Assert(false); - return false; + build_EvalXFunc(b, mod, "ExecEvalRPRNavRestore", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; case EEOP_AGG_STRICT_DESERIALIZE: case EEOP_AGG_DESERIALIZE: diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index b005c7e07a1..541e6bf8f98 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -2153,6 +2153,37 @@ SELECT match_first, match_last, match_len FROM result WHERE match_len > 0; 0 | 99998 | 99999 (1 row) +-- JIT PREV/NEXT navigation test: 100K rows with PREV in DEFINE. +-- Exercises EEOP_RPR_NAV_SET/RESTORE JIT code paths (has_rpr_nav reload) +-- at scale. V-shape: price rises then falls, repeated across partition. +SET jit_above_cost = 0; +WITH data AS ( + SELECT i, abs(50000 - i) AS price + FROM generate_series(1, 100000) i +), +result AS ( + SELECT i, price, + count(*) OVER w AS match_len, + first_value(price) OVER w AS match_first + FROM data + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (DOWN+ UP+) + DEFINE + DOWN AS price < PREV(price), + UP AS price > PREV(price) + ) +) +SELECT count(*) AS matched_rows, max(match_len) AS longest_match +FROM result WHERE match_len > 0; + matched_rows | longest_match +--------------+--------------- + 1 | 99999 +(1 row) + +RESET jit_above_cost; -- -- Subquery wrapping: RPR window inside outer aggregate. -- Tests that WindowAgg is not removed by remove_unused_subquery_outputs() diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index 49dac932d96..cc8daae481e 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -1084,6 +1084,33 @@ result AS ( -- Should match: A (33333 rows) + B (33333 rows) + C (33333 rows) = 99999 rows SELECT match_first, match_last, match_len FROM result WHERE match_len > 0; +-- JIT PREV/NEXT navigation test: 100K rows with PREV in DEFINE. +-- Exercises EEOP_RPR_NAV_SET/RESTORE JIT code paths (has_rpr_nav reload) +-- at scale. V-shape: price rises then falls, repeated across partition. +SET jit_above_cost = 0; +WITH data AS ( + SELECT i, abs(50000 - i) AS price + FROM generate_series(1, 100000) i +), +result AS ( + SELECT i, price, + count(*) OVER w AS match_len, + first_value(price) OVER w AS match_first + FROM data + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (DOWN+ UP+) + DEFINE + DOWN AS price < PREV(price), + UP AS price > PREV(price) + ) +) +SELECT count(*) AS matched_rows, max(match_len) AS longest_match +FROM result WHERE match_len > 0; +RESET jit_above_cost; + -- -- Subquery wrapping: RPR window inside outer aggregate. -- Tests that WindowAgg is not removed by remove_unused_subquery_outputs() -- 2.50.1 (Apple Git-155)