diff --git a/doc/src/sgml/rangetypes.sgml b/doc/src/sgml/rangetypes.sgml index 3a034d9..696a261 100644 --- a/doc/src/sgml/rangetypes.sgml +++ b/doc/src/sgml/rangetypes.sgml @@ -522,4 +522,118 @@ INSERT 0 1 + + Range Join + + + range type + join + + + + A Range Join is a join where one of the join conditions is the range + overlaps operator (see ). In other + words, rather than returning rows where corresponding fields are equal, it + returns rows where the corresponding fields overlap. + + + For instance, to find users who were active on a website at the same time + as they were using a mobile app, you might issue a query like: + +CREATE TABLE web_session( + web_session_id text primary key, + username text, + during tstzrange); +CREATE TABLE app_session( + app_session_id text primary key, + username text, + during tstzrange); +INSERT INTO web_session VALUES + ('0cc175b9c0f1b6a8', 'user1', '[2017-02-01 09:30, 2017-02-01 10:45)'), + ('92eb5ffee6ae2fec', 'user2', '[2017-02-01 09:30, 2017-02-01 10:45)'), + ('4a8a08f09d37b737', 'user3', '[2017-02-01 09:30, 2017-02-01 10:45)'); +INSERT INTO app_session VALUES + ('8277e0910d750195', 'user1', '[2017-02-01 10:30, 2017-02-01 11:45)'), + ('b448797616e091ad', 'user3', '[2017-02-01 09:00, 2017-02-01 11:00)'), + ('95649038408b5f33', 'user4', '[2017-02-01 09:30, 2017-02-01 10:45)'); + +SELECT w.username, + w.during * a.during AS both_during + FROM web_session w, app_session a + WHERE w.username = a.username + AND w.during && a.during; + username | both_during +----------+----------------------------------------------------- + user1 | ["2017-02-01 10:30:00-08","2017-02-01 10:45:00-08") + user3 | ["2017-02-01 09:30:00-08","2017-02-01 10:45:00-08") +(2 rows) + + + + In addition to the general execution strategies available, Postgres also + has special support for a variant of Merge Join called Range Merge Join, + which offers much better performance in most cases: + +EXPLAIN (costs off) SELECT w.username, + w.during * a.during AS both_during + FROM web_session w, app_session a + WHERE w.username = a.username + AND w.during && a.during; + QUERY PLAN +---------------------------------------------------------------------- + Range Merge Join + Merge Cond: ((w.during && a.during) AND (w.username = a.username)) + -> Sort + Sort Key: w.during, w.username + -> Seq Scan on web_session w + -> Sort + Sort Key: a.during, a.username + -> Seq Scan on app_session a +(8 rows) + + + + If the base table does not use range types, you can still use Range Join + by constructing the appropriate range types in the query itself: + +CREATE TABLE web_session( + web_session_id text primary key, + username text, + start_time timestamptz, + end_time timestamptz); +CREATE TABLE app_session( + app_session_id text primary key, + username text, + start_time timestamptz, + end_time timestamptz); +INSERT INTO web_session VALUES + ('0cc175b9c0f1b6a8', 'user1', '2017-02-01 09:30', '2017-02-01 10:45'), + ('92eb5ffee6ae2fec', 'user2', '2017-02-01 09:30', '2017-02-01 10:45'), + ('4a8a08f09d37b737', 'user3', '2017-02-01 09:30', '2017-02-01 10:45'); +INSERT INTO app_session VALUES + ('8277e0910d750195', 'user1', '2017-02-01 10:30', '2017-02-01 11:45'), + ('b448797616e091ad', 'user3', '2017-02-01 09:00', '2017-02-01 11:00'), + ('95649038408b5f33', 'user4', '2017-02-01 09:30', '2017-02-01 10:45'); + +EXPLAIN (costs off) SELECT * + FROM web_session w, app_session a + WHERE w.username = a.username + AND tstzrange(w.start_time,w.end_time) && tstzrange(a.start_time,a.end_time); + QUERY PLAN + +------------------------------------------------------------------------------------------ +-------------------------------------- + Range Merge Join + Merge Cond: (((tstzrange(w.start_time, w.end_time)) && (tstzrange(a.start_time, a.end_t +ime))) AND (w.username = a.username)) + -> Sort + Sort Key: (tstzrange(w.start_time, w.end_time)), w.username + -> Seq Scan on web_session w + -> Sort + Sort Key: (tstzrange(a.start_time, a.end_time)), a.username + -> Seq Scan on app_session a +(8 rows) + + + diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 79e6985..f3fcc80 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -917,7 +917,11 @@ ExplainNode(PlanState *planstate, List *ancestors, pname = sname = "Nested Loop"; break; case T_MergeJoin: - pname = "Merge"; /* "Join" gets added by jointype switch */ + if (((MergeJoin*)plan)->mergeRangeJoin) + pname = "Range Merge"; /* "Join" gets added by jointype switch */ + else + pname = "Merge"; /* "Join" gets added by jointype switch */ + sname = "Merge Join"; break; case T_HashJoin: diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index b52946f..4765a32 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -89,15 +89,67 @@ * proceed to another state. This state is stored in the node's * execution state information and is preserved across calls to * ExecMergeJoin. -cim 10/31/89 + * + * RANGE MERGE JOIN + * + * Range Merge Join is a generalization of merge join. When a join + * predicate involves the range overlaps operator (&&), a merge join + * becomes a range join. + * + * The input ranges must be ordered by (lower-bound, upper-bound), which + * is the ordinary range_ops operator class. MJCompare must not simply + * use the operator class's comparison semantics though; instead it + * returns: + * + * - MJCMP_MATCHES if outer range overlaps inner range + * - MJCMP_LEFTOF if outer range is strictly left-of inner range + * - MJCMP_RIGHTOF if outer range is strictly right-of inner range + * + * NB: the above is a simplification considering only a single merge + * clause. When there are multiple merge clauses, it's possible that one + * tuple is neither right-of, nor left-of, nor matching. For instance, if + * an earlier range merge clause matches (overlaps), but a later clause + * fails. In that case, MJCompare returns MJCMP_NOSKIP. + * + * If MJCompare returns MJCMP_RIGHTOF, later or earlier tuples on the + * inner side may match. For example: + * + * OUTER INNER + * ... [1,9] matches current outer + * [4,5] [2,3] no match + * ... [3,5] matches current outer + * ... [7,9] no match and no later matches for current outer + * + * Outer tuple [4,5] does not match [2,3], but it matches (overlaps with) + * the earlier tuple [1,9] and the later tuple [3,5]. But once we + * encounter [7,9], we know that no later inner tuple can possibly match + * the outer. + * + * When doing a range join, we lose two merge join optimizations: + * + * 1. Ordinary merge join only restores to the mark if it's equal to the + * new outer. For range join, we must always restore to the mark + * because there may be matches after the mark and before the current + * inner tuple. + * 2. After restoring to the mark, ordinary merge join immediately moves + * to JOINTUPLES. Range join must move to SKIP_TEST first. + * + * Range merge join is unable to implement RIGHT/FULL joins. It's also + * unable to cope with reverse sort order, because there could always be + * some later inner range that matches the outer tuple. */ #include "postgres.h" #include "access/nbtree.h" +#include "catalog/pg_operator.h" #include "executor/execdebug.h" #include "executor/nodeMergejoin.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/rangetypes.h" +#include "utils/typcache.h" /* @@ -138,6 +190,10 @@ typedef struct MergeJoinClauseData * stored here. */ SortSupportData ssup; + + /* needed for Range Join */ + bool isrange; + TypeCacheEntry *range_typcache; } MergeJoinClauseData; /* Result type for MJEvalOuterValues and MJEvalInnerValues */ @@ -148,6 +204,13 @@ typedef enum MJEVAL_ENDOFJOIN /* end of input (physical or effective) */ } MJEvalResult; +typedef enum +{ + MJCMP_LEFTOF, + MJCMP_RIGHTOF, + MJCMP_MATCHES, + MJCMP_NOSKIP +} MJCompareResult; #define MarkInnerTuple(innerTupleSlot, mergestate) \ ExecCopySlot((mergestate)->mj_MarkedTupleSlot, (innerTupleSlot)) @@ -178,6 +241,7 @@ MJExamineQuals(List *mergeclauses, Oid *mergecollations, int *mergestrategies, bool *mergenullsfirst, + bool *israngejoin, PlanState *parent) { MergeJoinClause clauses; @@ -185,6 +249,8 @@ MJExamineQuals(List *mergeclauses, int iClause; ListCell *cl; + *israngejoin = false; + clauses = (MergeJoinClause) palloc0(nClauses * sizeof(MergeJoinClauseData)); iClause = 0; @@ -196,6 +262,7 @@ MJExamineQuals(List *mergeclauses, Oid collation = mergecollations[iClause]; StrategyNumber opstrategy = mergestrategies[iClause]; bool nulls_first = mergenullsfirst[iClause]; + Oid eq_opno; int op_strategy; Oid op_lefttype; Oid op_righttype; @@ -221,8 +288,32 @@ MJExamineQuals(List *mergeclauses, elog(ERROR, "unsupported mergejoin strategy %d", opstrategy); clause->ssup.ssup_nulls_first = nulls_first; + /* + * If a range join, the original operator must be "&&" rather than + * "=". Set eq_opno to the range equality operator for the purposes of + * setting up the merge clause, but mark it as a range. + */ + if (qual->opno == OID_RANGE_OVERLAP_OP) + { + Oid rngtypid = exprType((Node*)clause->lexpr->expr); + Assert(exprType((Node*)clause->lexpr->expr) == + exprType((Node*)clause->rexpr->expr)); + eq_opno = OID_RANGE_EQ_OP; + clause->isrange = true; + clause->range_typcache = lookup_type_cache(rngtypid, + TYPECACHE_RANGE_INFO); + *israngejoin = true; + } + else + { + eq_opno = qual->opno; + clause->isrange = false; + clause->range_typcache = NULL; + } + + /* Extract the operator's declared left/right datatypes */ - get_op_opfamily_properties(qual->opno, opfamily, false, + get_op_opfamily_properties(eq_opno, opfamily, false, &op_strategy, &op_lefttype, &op_righttype); @@ -324,6 +415,11 @@ MJEvalOuterValues(MergeJoinState *mergestate) else if (result == MJEVAL_MATCHABLE) result = MJEVAL_NONMATCHABLE; } + else if (clause->isrange) + { + if (RangeIsEmpty(DatumGetRangeTypeP(clause->ldatum))) + result = MJEVAL_NONMATCHABLE; + } } MemoryContextSwitchTo(oldContext); @@ -371,6 +467,11 @@ MJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot) else if (result == MJEVAL_MATCHABLE) result = MJEVAL_NONMATCHABLE; } + else if (clause->isrange) + { + if (RangeIsEmpty(DatumGetRangeTypeP(clause->rdatum))) + result = MJEVAL_NONMATCHABLE; + } } MemoryContextSwitchTo(oldContext); @@ -379,6 +480,34 @@ MJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot) } /* + * Return 0 if ranges overlap, <0 if the first range is left-of the second, or + * >0 if the first range is right-of the second. See comment at the top of the + * file for explanation. + */ +static int +ApplyRangeMatch(Datum ldatum, bool lisnull, Datum rdatum, bool risnull, + SortSupport ssup, TypeCacheEntry *typcache) +{ + /* can't handle reverse sort order; should be prevented by optimizer */ + Assert(!ssup->ssup_reverse); + Assert(!lisnull || !risnull); + + if (lisnull) + return ssup->ssup_nulls_first ? -1 : 1; + if (risnull) + return ssup->ssup_nulls_first ? 1 : -1; + + if (range_before_internal(typcache, DatumGetRangeTypeP(ldatum), + DatumGetRangeTypeP(rdatum))) + return -1; + else if (range_overlaps_internal(typcache, DatumGetRangeTypeP(ldatum), + DatumGetRangeTypeP(rdatum))) + return 0; + else + return 1; +} + +/* * MJCompare * * Compare the mergejoinable values of the current two input tuples @@ -388,11 +517,12 @@ MJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot) * MJEvalOuterValues and MJEvalInnerValues must already have been called * for the current outer and inner tuples, respectively. */ -static int +static MJCompareResult MJCompare(MergeJoinState *mergestate) { int result = 0; bool nulleqnull = false; + bool range_overlaps = false; ExprContext *econtext = mergestate->js.ps.ps_ExprContext; int i; MemoryContext oldContext; @@ -418,14 +548,25 @@ MJCompare(MergeJoinState *mergestate) continue; } - result = ApplySortComparator(clause->ldatum, clause->lisnull, + if (clause->isrange) + { + result = ApplyRangeMatch(clause->ldatum, clause->lisnull, clause->rdatum, clause->risnull, - &clause->ssup); + &clause->ssup, clause->range_typcache); + if (result == 0) + range_overlaps = true; + } + else + result = ApplySortComparator(clause->ldatum, clause->lisnull, + clause->rdatum, clause->risnull, + &clause->ssup); if (result != 0) break; } + MemoryContextSwitchTo(oldContext); + /* * If we had any NULL-vs-NULL inputs, we do not want to report that the * tuples are equal. Instead, if result is still 0, change it to +1. This @@ -437,11 +578,22 @@ MJCompare(MergeJoinState *mergestate) */ if (result == 0 && (nulleqnull || mergestate->mj_ConstFalseJoin)) - result = 1; + return MJCMP_RIGHTOF; - MemoryContextSwitchTo(oldContext); + /* + * If a range predicate succeeds (overlaps) but a later predicate fails, + * it's neither strictly right-of, nor strictly left-of, nor matching. So + * we return MJCMP_NOSKIP. + */ + if (result != 0 && range_overlaps) + return MJCMP_NOSKIP; - return result; + if (result == 0) + return MJCMP_MATCHES; + else if (result < 0) + return MJCMP_LEFTOF; + else + return MJCMP_RIGHTOF; } @@ -533,7 +685,6 @@ check_constant_qual(List *qual, bool *is_const_false) return true; } - /* ---------------------------------------------------------------- * ExecMergeTupleDump * @@ -891,12 +1042,21 @@ ExecMergeJoin(PlanState *pstate) compareResult = MJCompare(node); MJ_DEBUG_COMPARE(compareResult); - if (compareResult == 0) + if (compareResult == MJCMP_MATCHES) node->mj_JoinState = EXEC_MJ_JOINTUPLES; + else if (compareResult == MJCMP_LEFTOF) + node->mj_JoinState = EXEC_MJ_NEXTOUTER; else { - Assert(compareResult < 0); - node->mj_JoinState = EXEC_MJ_NEXTOUTER; + /* + * This state is only reached during a range join, + * where earlier tuples may match, the current + * tuple may not match, but a later tuple in the + * inner may still match. See comment at the top + * of the file. + */ + Assert(((MergeJoin*)node->js.ps.plan)->mergeRangeJoin); + node->mj_JoinState = EXEC_MJ_NEXTINNER; } break; case MJEVAL_NONMATCHABLE: @@ -1038,17 +1198,33 @@ ExecMergeJoin(PlanState *pstate) MJ_printf("ExecMergeJoin: EXEC_MJ_TESTOUTER\n"); /* - * Here we must compare the outer tuple with the marked inner - * tuple. (We can ignore the result of MJEvalInnerValues, - * since the marked inner tuple is certainly matchable.) + * We can ignore the result of MJEvalInnerValues, since the + * marked inner tuple is certainly matchable. */ innerTupleSlot = node->mj_MarkedTupleSlot; (void) MJEvalInnerValues(node, innerTupleSlot); + if (((MergeJoin*)node->js.ps.plan)->mergeRangeJoin) + { + /* + * For range join, we must always restore to the mark, and + * then move to SKIP_TEST. + */ + ExecRestrPos(innerPlan); + node->mj_InnerTupleSlot = node->mj_MarkedTupleSlot; + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + } + + /* + * Here we must compare the outer tuple with the marked inner + * tuple. + */ compareResult = MJCompare(node); + Assert(compareResult != MJCMP_NOSKIP); MJ_DEBUG_COMPARE(compareResult); - if (compareResult == 0) + if (compareResult == MJCMP_MATCHES) { /* * the merge clause matched so now we restore the inner @@ -1106,7 +1282,7 @@ ExecMergeJoin(PlanState *pstate) * no more inners, no more matches are possible. * ---------------- */ - Assert(compareResult > 0); + Assert(compareResult == MJCMP_RIGHTOF); innerTupleSlot = node->mj_InnerTupleSlot; /* reload comparison data for current inner */ @@ -1143,7 +1319,7 @@ ExecMergeJoin(PlanState *pstate) break; /*---------------------------------------------------------- - * EXEC_MJ_SKIP means compare tuples and if they do not + * EXEC_MJ_SKIP_TEST means compare tuples and if they do not * match, skip whichever is lesser. * * For example: @@ -1182,19 +1358,23 @@ ExecMergeJoin(PlanState *pstate) compareResult = MJCompare(node); MJ_DEBUG_COMPARE(compareResult); - if (compareResult == 0) + if (compareResult == MJCMP_MATCHES || + compareResult == MJCMP_NOSKIP) { if (!node->mj_SkipMarkRestore) ExecMarkPos(innerPlan); MarkInnerTuple(node->mj_InnerTupleSlot, node); - node->mj_JoinState = EXEC_MJ_JOINTUPLES; + if (compareResult == MJCMP_NOSKIP) + node->mj_JoinState = EXEC_MJ_NEXTINNER; + else + node->mj_JoinState = EXEC_MJ_JOINTUPLES; } - else if (compareResult < 0) + else if (compareResult == MJCMP_LEFTOF) node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE; else - /* compareResult > 0 */ + /* compareResult == MJCMP_RIGHTOF */ node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; break; @@ -1598,6 +1778,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) node->mergeCollations, node->mergeStrategies, node->mergeNullsFirst, + &node->mergeRangeJoin, (PlanState *) mergestate); /* diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 396ee27..b750a27 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -567,6 +567,19 @@ try_mergejoin_path(PlannerInfo *root, Relids required_outer; JoinCostWorkspace workspace; + /* RIGHT/FULL joins don't support range join */ + if (jointype == JOIN_RIGHT || jointype == JOIN_FULL) + { + ListCell *lc; + + foreach(lc, mergeclauses) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + if (restrictinfo->rangejoin) + return; + } + } + if (is_partial) { try_partial_mergejoin_path(root, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ef58cff..c766977 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -1125,6 +1125,7 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, int nClauses = list_length(mergeclauses); EquivalenceClass **ecs; int *scores; + bool *range_ecs; int necs; ListCell *lc; int j; @@ -1139,6 +1140,7 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, */ ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *)); scores = (int *) palloc(nClauses * sizeof(int)); + range_ecs = palloc(nClauses * sizeof(bool)); necs = 0; foreach(lc, mergeclauses) @@ -1179,6 +1181,7 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, ecs[necs] = oeclass; scores[necs] = score; + range_ecs[necs] = rinfo->rangejoin; necs++; } @@ -1196,6 +1199,11 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, for (j = 0; j < necs; j++) { + /* for range join, the input order must be ascending */ + if (range_ecs[j] && + query_pathkey->pk_strategy != BTLessStrategyNumber) + continue; + if (ecs[j] == query_ec) break; /* found match */ } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index ef25fef..31d1149 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -1101,8 +1101,9 @@ is_innerrel_unique_for(PlannerInfo *root, if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype)) continue; - /* Ignore if it's not a mergejoinable clause */ + /* Ignore if it's not a mergejoinable clause or is a range join */ if (!restrictinfo->can_join || + restrictinfo->rangejoin || restrictinfo->mergeopfamilies == NIL) continue; /* not mergejoinable */ diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index a436b53..0699b6b 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -14,6 +14,7 @@ */ #include "postgres.h" +#include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "catalog/pg_class.h" #include "nodes/nodeFuncs.h" @@ -1961,7 +1962,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, */ if (restrictinfo->mergeopfamilies) { - if (maybe_equivalence) + if (maybe_equivalence && !restrictinfo->rangejoin) { if (check_equivalence_delay(root, restrictinfo) && process_equivalence(root, &restrictinfo, below_outer_join)) @@ -2616,6 +2617,12 @@ check_mergejoinable(RestrictInfo *restrictinfo) opno = ((OpExpr *) clause)->opno; leftarg = linitial(((OpExpr *) clause)->args); + if (opno == OID_RANGE_OVERLAP_OP) + { + restrictinfo->rangejoin = true; + opno = OID_RANGE_EQ_OP; + } + if (op_mergejoinable(opno, exprType(leftarg)) && !contain_volatile_functions((Node *) clause)) restrictinfo->mergeopfamilies = get_mergejoin_opfamilies(opno); diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index 1075dde..8e3e057 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -186,6 +186,7 @@ make_restrictinfo_internal(Expr *clause, restrictinfo->outer_selec = -1; restrictinfo->mergeopfamilies = NIL; + restrictinfo->rangejoin = false; restrictinfo->left_ec = NULL; restrictinfo->right_ec = NULL; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index fcc8323..a4844f2 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2980,7 +2980,6 @@ mergejoinscansel(PlannerInfo *root, Node *clause, *right; VariableStatData leftvar, rightvar; - int op_strategy; Oid op_lefttype; Oid op_righttype; Oid opno, @@ -3017,12 +3016,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause, examine_variable(root, left, 0, &leftvar); examine_variable(root, right, 0, &rightvar); - /* Extract the operator's declared left/right datatypes */ - get_op_opfamily_properties(opno, opfamily, false, - &op_strategy, - &op_lefttype, - &op_righttype); - Assert(op_strategy == BTEqualStrategyNumber); + if (opno == OID_RANGE_OVERLAP_OP) + { + op_lefttype = op_righttype = ANYRANGEOID; + } + else + { + int op_strategy; + /* Extract the operator's declared left/right datatypes */ + get_op_opfamily_properties(opno, opfamily, false, + &op_strategy, + &op_lefttype, + &op_righttype); + Assert(op_strategy == BTEqualStrategyNumber); + } /* * Look up the various operators we need. If we don't find them all, it diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index e74f963..73f8cc2 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -1748,6 +1748,7 @@ DESCR("greater than or equal"); /* generic range type operators */ DATA(insert OID = 3882 ( "=" PGNSP PGUID b t t 3831 3831 16 3882 3883 range_eq eqsel eqjoinsel )); DESCR("equal"); +#define OID_RANGE_EQ_OP 3882 DATA(insert OID = 3883 ( "<>" PGNSP PGUID b f f 3831 3831 16 3883 3882 range_ne neqsel neqjoinsel )); DESCR("not equal"); DATA(insert OID = 3884 ( "<" PGNSP PGUID b f f 3831 3831 16 3887 3886 range_lt rangesel scalarltjoinsel )); diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 74e9fb5..8748e07 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -715,6 +715,7 @@ typedef struct MergeJoin Oid *mergeCollations; /* per-clause OIDs of collations */ int *mergeStrategies; /* per-clause ordering (ASC or DESC) */ bool *mergeNullsFirst; /* per-clause nulls ordering */ + bool mergeRangeJoin; /* is this a range merge join? */ } MergeJoin; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 71689b8..0f3c88f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1886,6 +1886,9 @@ typedef struct RestrictInfo /* valid if clause is mergejoinable, else NIL */ List *mergeopfamilies; /* opfamilies containing clause operator */ + /* is a rangejoin clause? */ + bool rangejoin; + /* cache space for mergeclause processing; NULL if not yet set */ EquivalenceClass *left_ec; /* EquivalenceClass containing lefthand */ EquivalenceClass *right_ec; /* EquivalenceClass containing righthand */ diff --git a/src/test/regress/expected/rangejoin.out b/src/test/regress/expected/rangejoin.out new file mode 100644 index 0000000..bda7c23 --- /dev/null +++ b/src/test/regress/expected/rangejoin.out @@ -0,0 +1,518 @@ +-- test with unique to exercise more of the planner +create table rangejoin_left(i1 int, ir1 int4range unique); +create table rangejoin_right(i2 int, ir2 int4range unique); +insert into rangejoin_left values + (1001, int4range(10, 80)), + (1002, int4range(20, 30)), + (1003, int4range(21, 25)), + (1004, int4range(22, 35)), + (1005, int4range(40, 60)), + (1006, int4range(50, 60)); +insert into rangejoin_right values + (1000, 'empty'::int4range), + (1001, int4range(NULL, 4)), + (1002, int4range(10, 12)), + (1003, int4range(11, 14)), + (1004, int4range(20, 45)), + (1005, int4range(24, 28)), + (1006, int4range(85, 90)); +-- simple inner join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left, rangejoin_right + where ir1 && ir2; + QUERY PLAN +------------------------------------------------------------------------- + Range Merge Join + Merge Cond: (rangejoin_left.ir1 && rangejoin_right.ir2) + -> Index Scan using rangejoin_left_ir1_key on rangejoin_left + -> Materialize + -> Index Scan using rangejoin_right_ir2_key on rangejoin_right +(5 rows) + +select i1, ir1, i2, ir2 + from rangejoin_left, rangejoin_right + where ir1 && ir2; + i1 | ir1 | i2 | ir2 +------+---------+------+--------- + 1001 | [10,80) | 1002 | [10,12) + 1001 | [10,80) | 1003 | [11,14) + 1001 | [10,80) | 1004 | [20,45) + 1001 | [10,80) | 1005 | [24,28) + 1002 | [20,30) | 1004 | [20,45) + 1002 | [20,30) | 1005 | [24,28) + 1003 | [21,25) | 1004 | [20,45) + 1003 | [21,25) | 1005 | [24,28) + 1004 | [22,35) | 1004 | [20,45) + 1004 | [22,35) | 1005 | [24,28) + 1005 | [40,60) | 1004 | [20,45) +(11 rows) + +-- two predicates +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Range Merge Join + Merge Cond: ((rangejoin_left.ir1 && rangejoin_right.ir2) AND (rangejoin_left.i1 = rangejoin_right.i2)) + -> Sort + Sort Key: rangejoin_left.ir1, rangejoin_left.i1 + -> Seq Scan on rangejoin_left + -> Sort + Sort Key: rangejoin_right.ir2, rangejoin_right.i2 + -> Seq Scan on rangejoin_right +(8 rows) + +select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2); + i1 | ir1 | i2 | ir2 +------+---------+------+--------- + 1004 | [22,35) | 1004 | [20,45) +(1 row) + +-- left join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left left join rangejoin_right + on (ir1 && ir2); + QUERY PLAN +------------------------------------------------------------------------- + Range Merge Left Join + Merge Cond: (rangejoin_left.ir1 && rangejoin_right.ir2) + -> Index Scan using rangejoin_left_ir1_key on rangejoin_left + -> Materialize + -> Index Scan using rangejoin_right_ir2_key on rangejoin_right +(5 rows) + +select i1, ir1, i2, ir2 + from rangejoin_left left join rangejoin_right + on (ir1 && ir2); + i1 | ir1 | i2 | ir2 +------+---------+------+--------- + 1001 | [10,80) | 1002 | [10,12) + 1001 | [10,80) | 1003 | [11,14) + 1001 | [10,80) | 1004 | [20,45) + 1001 | [10,80) | 1005 | [24,28) + 1002 | [20,30) | 1004 | [20,45) + 1002 | [20,30) | 1005 | [24,28) + 1003 | [21,25) | 1004 | [20,45) + 1003 | [21,25) | 1005 | [24,28) + 1004 | [22,35) | 1004 | [20,45) + 1004 | [22,35) | 1005 | [24,28) + 1005 | [40,60) | 1004 | [20,45) + 1006 | [50,60) | | +(12 rows) + +-- right join should be implemented as left join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left right join rangejoin_right + on (ir1 && ir2); + QUERY PLAN +----------------------------------------------------------------------- + Range Merge Left Join + Merge Cond: (rangejoin_right.ir2 && rangejoin_left.ir1) + -> Index Scan using rangejoin_right_ir2_key on rangejoin_right + -> Materialize + -> Index Scan using rangejoin_left_ir1_key on rangejoin_left +(5 rows) + +-- full join doesn't support range join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left full join rangejoin_right + on (ir1 && ir2); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +-- range input to range join must be ascending +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 desc, i1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: rangejoin_left.ir1 DESC, rangejoin_left.i1 + -> Range Merge Join + Merge Cond: ((rangejoin_left.ir1 && rangejoin_right.ir2) AND (rangejoin_left.i1 = rangejoin_right.i2)) + -> Sort + Sort Key: rangejoin_left.ir1, rangejoin_left.i1 + -> Seq Scan on rangejoin_left + -> Sort + Sort Key: rangejoin_right.ir2, rangejoin_right.i2 + -> Seq Scan on rangejoin_right +(10 rows) + +-- but it's OK for non-range inputs to be descending +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Range Merge Join + Merge Cond: ((rangejoin_left.ir1 && rangejoin_right.ir2) AND (rangejoin_left.i1 = rangejoin_right.i2)) + -> Sort + Sort Key: rangejoin_left.ir1 NULLS FIRST, rangejoin_left.i1 DESC + -> Seq Scan on rangejoin_left + -> Sort + Sort Key: rangejoin_right.ir2 NULLS FIRST, rangejoin_right.i2 DESC + -> Seq Scan on rangejoin_right +(8 rows) + +select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + i1 | ir1 | i2 | ir2 +------+---------+------+--------- + 1004 | [22,35) | 1004 | [20,45) +(1 row) + +drop table rangejoin_left; +drop table rangejoin_right; +create table multirangejoin_left (ir1 int4range, ir2 int4range); +create table multirangejoin_right (ir3 int4range, ir4 int4range); +insert into multirangejoin_left values + (int4range(30,99), int4range(20,30)), + (int4range(2,40), int4range(15,27)), + (int4range(61,99), int4range(20,45)), + (int4range(22,32), int4range(21,66)), + (int4range(36,71), int4range(45,49)), + (int4range(9,80), int4range(2,4)); +insert into multirangejoin_right values + (int4range(9,70), int4range(10,78)), + (int4range(21,37), int4range(89,99)), + (int4range(5,98), int4range(35,97)), + (int4range(12,17), int4range(81,92)), + (int4range(15,19), int4range(5,55)), + (int4range(57,58), int4range(42,80)); +explain (costs off) select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: multirangejoin_left.ir1, multirangejoin_left.ir2, multirangejoin_right.ir3, multirangejoin_right.ir4 + -> Range Merge Join + Merge Cond: ((multirangejoin_left.ir1 && multirangejoin_right.ir3) AND (multirangejoin_left.ir2 && multirangejoin_right.ir4)) + -> Sort + Sort Key: multirangejoin_left.ir1, multirangejoin_left.ir2 + -> Seq Scan on multirangejoin_left + -> Sort + Sort Key: multirangejoin_right.ir3, multirangejoin_right.ir4 + -> Seq Scan on multirangejoin_right +(10 rows) + +select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; + ir1 | ir2 | ir3 | ir4 +---------+---------+---------+--------- + [2,40) | [15,27) | [9,70) | [10,78) + [2,40) | [15,27) | [15,19) | [5,55) + [22,32) | [21,66) | [5,98) | [35,97) + [22,32) | [21,66) | [9,70) | [10,78) + [30,99) | [20,30) | [9,70) | [10,78) + [36,71) | [45,49) | [5,98) | [35,97) + [36,71) | [45,49) | [9,70) | [10,78) + [36,71) | [45,49) | [57,58) | [42,80) + [61,99) | [20,45) | [5,98) | [35,97) + [61,99) | [20,45) | [9,70) | [10,78) +(10 rows) + +set enable_mergejoin=false; +explain (costs off) select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: multirangejoin_left.ir1, multirangejoin_left.ir2, multirangejoin_right.ir3, multirangejoin_right.ir4 + -> Nested Loop + Join Filter: ((multirangejoin_left.ir1 && multirangejoin_right.ir3) AND (multirangejoin_left.ir2 && multirangejoin_right.ir4)) + -> Seq Scan on multirangejoin_left + -> Materialize + -> Seq Scan on multirangejoin_right +(7 rows) + +select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; + ir1 | ir2 | ir3 | ir4 +---------+---------+---------+--------- + [2,40) | [15,27) | [9,70) | [10,78) + [2,40) | [15,27) | [15,19) | [5,55) + [22,32) | [21,66) | [5,98) | [35,97) + [22,32) | [21,66) | [9,70) | [10,78) + [30,99) | [20,30) | [9,70) | [10,78) + [36,71) | [45,49) | [5,98) | [35,97) + [36,71) | [45,49) | [9,70) | [10,78) + [36,71) | [45,49) | [57,58) | [42,80) + [61,99) | [20,45) | [5,98) | [35,97) + [61,99) | [20,45) | [9,70) | [10,78) +(10 rows) + +set enable_mergejoin=true; +explain (costs off) select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: multirangejoin_right.ir4, multirangejoin_right.ir3, multirangejoin_left.ir2, multirangejoin_left.ir1 + -> Range Merge Left Join + Merge Cond: ((multirangejoin_left.ir1 && multirangejoin_right.ir4) AND (multirangejoin_left.ir2 && multirangejoin_right.ir3)) + -> Sort + Sort Key: multirangejoin_left.ir1, multirangejoin_left.ir2 + -> Seq Scan on multirangejoin_left + -> Sort + Sort Key: multirangejoin_right.ir4, multirangejoin_right.ir3 + -> Seq Scan on multirangejoin_right +(10 rows) + +select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; + ir1 | ir2 | ir3 | ir4 +---------+---------+---------+--------- + [2,40) | [15,27) | [15,19) | [5,55) + [2,40) | [15,27) | [9,70) | [10,78) + [30,99) | [20,30) | [9,70) | [10,78) + [61,99) | [20,45) | [9,70) | [10,78) + [22,32) | [21,66) | [9,70) | [10,78) + [36,71) | [45,49) | [9,70) | [10,78) + [2,40) | [15,27) | [5,98) | [35,97) + [30,99) | [20,30) | [5,98) | [35,97) + [61,99) | [20,45) | [5,98) | [35,97) + [36,71) | [45,49) | [5,98) | [35,97) + [30,99) | [20,30) | [21,37) | [89,99) + [61,99) | [20,45) | [21,37) | [89,99) + [9,80) | [2,4) | | +(13 rows) + +set enable_mergejoin=false; +explain (costs off) select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: multirangejoin_right.ir4, multirangejoin_right.ir3, multirangejoin_left.ir2, multirangejoin_left.ir1 + -> Nested Loop Left Join + Join Filter: ((multirangejoin_left.ir1 && multirangejoin_right.ir4) AND (multirangejoin_left.ir2 && multirangejoin_right.ir3)) + -> Seq Scan on multirangejoin_left + -> Materialize + -> Seq Scan on multirangejoin_right +(7 rows) + +select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; + ir1 | ir2 | ir3 | ir4 +---------+---------+---------+--------- + [2,40) | [15,27) | [15,19) | [5,55) + [2,40) | [15,27) | [9,70) | [10,78) + [30,99) | [20,30) | [9,70) | [10,78) + [61,99) | [20,45) | [9,70) | [10,78) + [22,32) | [21,66) | [9,70) | [10,78) + [36,71) | [45,49) | [9,70) | [10,78) + [2,40) | [15,27) | [5,98) | [35,97) + [30,99) | [20,30) | [5,98) | [35,97) + [61,99) | [20,45) | [5,98) | [35,97) + [36,71) | [45,49) | [5,98) | [35,97) + [30,99) | [20,30) | [21,37) | [89,99) + [61,99) | [20,45) | [21,37) | [89,99) + [9,80) | [2,4) | | +(13 rows) + +set enable_mergejoin=true; +drop table multirangejoin_left; +drop table multirangejoin_right; +create table bigrangejoin_left (i1 int, ir1 int4range); +create table bigrangejoin_right (i2 int, ir2 int4range); +-- 100 small ranges +insert into bigrangejoin_left + select g/4, + int4range(g, + g + case when (g%2=0) then g%7 else 12-(g%11) end) + from generate_series(1,100) g; +insert into bigrangejoin_right + select g/4, + int4range(g-7+(g%19), + g-7+(g%19) + case when (g%3=0) then g%11 else 17-(g%15) end) + from generate_series(1,100) g; +-- 10 medium ranges +insert into bigrangejoin_left + select g/4*10, + int4range(g*10, + g*10 + case when (g%2=0) then g%7 else 12-(g%11) end) + from generate_series(1,10) g; +insert into bigrangejoin_right + select g/4*10, + int4range(g*10-57+(g%173), + g*10-57+(g%173) + case when (g%3=0) then g%123 else 97-(g%83) end) + from generate_series(1,10) g; +insert into bigrangejoin_left select g*11-21, 'empty'::int4range + from generate_series(1,9) g; +insert into bigrangejoin_right select g*13-29, 'empty'::int4range + from generate_series(1,8) g; +insert into bigrangejoin_left values + (4, int4range(NULL,5)), + (93, int4range(95, NULL)); +insert into bigrangejoin_right values + (7, int4range(NULL,3)), + (92, int4range(99, NULL)); +create temp table rangejoin_result1 + (i1 int, ir1 int4range, i2 int, ir2 int4range); +create temp table rangejoin_result2 + (i1 int, ir1 int4range, i2 int, ir2 int4range); +set enable_hashjoin=false; +explain (costs off) insert into rangejoin_result1 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Insert on rangejoin_result1 + -> Range Merge Left Join + Merge Cond: ((bigrangejoin_left.ir1 && bigrangejoin_right.ir2) AND (bigrangejoin_left.i1 = bigrangejoin_right.i2)) + -> Sort + Sort Key: bigrangejoin_left.ir1 NULLS FIRST, bigrangejoin_left.i1 DESC + -> Seq Scan on bigrangejoin_left + -> Sort + Sort Key: bigrangejoin_right.ir2 NULLS FIRST, bigrangejoin_right.i2 DESC + -> Seq Scan on bigrangejoin_right +(9 rows) + +insert into rangejoin_result1 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; +set enable_hashjoin=true; +set enable_mergejoin=false; +explain (costs off) insert into rangejoin_result2 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + QUERY PLAN +-------------------------------------------------------------------------------- + Insert on rangejoin_result2 + -> Sort + Sort Key: bigrangejoin_left.ir1 NULLS FIRST, bigrangejoin_left.i1 DESC + -> Hash Left Join + Hash Cond: (bigrangejoin_left.i1 = bigrangejoin_right.i2) + Join Filter: (bigrangejoin_left.ir1 && bigrangejoin_right.ir2) + -> Seq Scan on bigrangejoin_left + -> Hash + -> Seq Scan on bigrangejoin_right +(9 rows) + +insert into rangejoin_result2 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; +set enable_mergejoin=true; +select count(*) from rangejoin_result1; + count +------- + 292 +(1 row) + +select count(*) from rangejoin_result2; + count +------- + 292 +(1 row) + +select * from rangejoin_result1 except select * from rangejoin_result2; + i1 | ir1 | i2 | ir2 +----+-----+----+----- +(0 rows) + +select * from rangejoin_result2 except select * from rangejoin_result1; + i1 | ir1 | i2 | ir2 +----+-----+----+----- +(0 rows) + +drop table rangejoin_result1; +drop table rangejoin_result2; +create temp table rangejoin_result3 + (i1 int, ir1 int4range, i2 int, ir2 int4range); +create temp table rangejoin_result4 + (i1 int, ir1 int4range, i2 int, ir2 int4range); +explain (costs off) insert into rangejoin_result3 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Insert on rangejoin_result3 + -> Range Merge Join + Merge Cond: ((bigrangejoin_left.i1 = bigrangejoin_right.i2) AND (bigrangejoin_left.ir1 && bigrangejoin_right.ir2)) + -> Sort + Sort Key: bigrangejoin_left.i1, bigrangejoin_left.ir1 + -> Seq Scan on bigrangejoin_left + -> Sort + Sort Key: bigrangejoin_right.i2, bigrangejoin_right.ir2 + -> Seq Scan on bigrangejoin_right +(9 rows) + +insert into rangejoin_result3 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; +set enable_mergejoin=false; +explain (costs off) insert into rangejoin_result4 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; + QUERY PLAN +------------------------------------------------------------------------------ + Insert on rangejoin_result4 + -> Sort + Sort Key: bigrangejoin_left.i1, bigrangejoin_left.ir1 + -> Hash Join + Hash Cond: (bigrangejoin_left.i1 = bigrangejoin_right.i2) + Join Filter: (bigrangejoin_left.ir1 && bigrangejoin_right.ir2) + -> Seq Scan on bigrangejoin_left + -> Hash + -> Seq Scan on bigrangejoin_right +(9 rows) + +insert into rangejoin_result4 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; +set enable_mergejoin=true; +select count(*) from rangejoin_result3; + count +------- + 259 +(1 row) + +select count(*) from rangejoin_result4; + count +------- + 259 +(1 row) + +select * from rangejoin_result3 except select * from rangejoin_result4; + i1 | ir1 | i2 | ir2 +----+-----+----+----- +(0 rows) + +select * from rangejoin_result4 except select * from rangejoin_result3; + i1 | ir1 | i2 | ir2 +----+-----+----+----- +(0 rows) + +drop table rangejoin_result3; +drop table rangejoin_result4; +drop table bigrangejoin_left; +drop table bigrangejoin_right; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e224977..88bc5bd 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -111,7 +111,7 @@ test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combo # NB: temp.sql does a reconnect which transiently uses 2 connections, # so keep this parallel group to at most 19 tests # ---------- -test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml +test: plancache limit plpgsql copy2 temp domain rangefuncs rangejoin prepare without_oid conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml # ---------- # Another group of parallel tests diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 9fc5f1a..5dd542a 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -167,6 +167,7 @@ test: copy2 test: temp test: domain test: rangefuncs +test: rangejoin test: prepare test: without_oid test: conversion diff --git a/src/test/regress/sql/rangejoin.sql b/src/test/regress/sql/rangejoin.sql new file mode 100644 index 0000000..ad859b5 --- /dev/null +++ b/src/test/regress/sql/rangejoin.sql @@ -0,0 +1,266 @@ + +-- test with unique to exercise more of the planner +create table rangejoin_left(i1 int, ir1 int4range unique); +create table rangejoin_right(i2 int, ir2 int4range unique); + +insert into rangejoin_left values + (1001, int4range(10, 80)), + (1002, int4range(20, 30)), + (1003, int4range(21, 25)), + (1004, int4range(22, 35)), + (1005, int4range(40, 60)), + (1006, int4range(50, 60)); + +insert into rangejoin_right values + (1000, 'empty'::int4range), + (1001, int4range(NULL, 4)), + (1002, int4range(10, 12)), + (1003, int4range(11, 14)), + (1004, int4range(20, 45)), + (1005, int4range(24, 28)), + (1006, int4range(85, 90)); + +-- simple inner join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left, rangejoin_right + where ir1 && ir2; + +select i1, ir1, i2, ir2 + from rangejoin_left, rangejoin_right + where ir1 && ir2; + +-- two predicates +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2); + +select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2); + +-- left join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left left join rangejoin_right + on (ir1 && ir2); + +select i1, ir1, i2, ir2 + from rangejoin_left left join rangejoin_right + on (ir1 && ir2); + +-- right join should be implemented as left join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left right join rangejoin_right + on (ir1 && ir2); + +-- full join doesn't support range join +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left full join rangejoin_right + on (ir1 && ir2); + +-- range input to range join must be ascending +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 desc, i1; + +-- but it's OK for non-range inputs to be descending +explain (costs off) select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + +select i1, ir1, i2, ir2 + from rangejoin_left inner join rangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + +drop table rangejoin_left; +drop table rangejoin_right; + +create table multirangejoin_left (ir1 int4range, ir2 int4range); +create table multirangejoin_right (ir3 int4range, ir4 int4range); + +insert into multirangejoin_left values + (int4range(30,99), int4range(20,30)), + (int4range(2,40), int4range(15,27)), + (int4range(61,99), int4range(20,45)), + (int4range(22,32), int4range(21,66)), + (int4range(36,71), int4range(45,49)), + (int4range(9,80), int4range(2,4)); + + +insert into multirangejoin_right values + (int4range(9,70), int4range(10,78)), + (int4range(21,37), int4range(89,99)), + (int4range(5,98), int4range(35,97)), + (int4range(12,17), int4range(81,92)), + (int4range(15,19), int4range(5,55)), + (int4range(57,58), int4range(42,80)); + +explain (costs off) select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; + +select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; + +set enable_mergejoin=false; +explain (costs off) select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; + +select * + from multirangejoin_left inner join multirangejoin_right + on (ir1 && ir3 and ir2 && ir4) order by ir1, ir2, ir3, ir4; +set enable_mergejoin=true; + +explain (costs off) select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; + +select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; + +set enable_mergejoin=false; +explain (costs off) select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; + +select * + from multirangejoin_left left join multirangejoin_right + on (ir1 && ir4 and ir2 && ir3) order by ir4, ir3, ir2, ir1; +set enable_mergejoin=true; + +drop table multirangejoin_left; +drop table multirangejoin_right; + +create table bigrangejoin_left (i1 int, ir1 int4range); +create table bigrangejoin_right (i2 int, ir2 int4range); + +-- 100 small ranges +insert into bigrangejoin_left + select g/4, + int4range(g, + g + case when (g%2=0) then g%7 else 12-(g%11) end) + from generate_series(1,100) g; +insert into bigrangejoin_right + select g/4, + int4range(g-7+(g%19), + g-7+(g%19) + case when (g%3=0) then g%11 else 17-(g%15) end) + from generate_series(1,100) g; + +-- 10 medium ranges +insert into bigrangejoin_left + select g/4*10, + int4range(g*10, + g*10 + case when (g%2=0) then g%7 else 12-(g%11) end) + from generate_series(1,10) g; +insert into bigrangejoin_right + select g/4*10, + int4range(g*10-57+(g%173), + g*10-57+(g%173) + case when (g%3=0) then g%123 else 97-(g%83) end) + from generate_series(1,10) g; + +insert into bigrangejoin_left select g*11-21, 'empty'::int4range + from generate_series(1,9) g; + +insert into bigrangejoin_right select g*13-29, 'empty'::int4range + from generate_series(1,8) g; + +insert into bigrangejoin_left values + (4, int4range(NULL,5)), + (93, int4range(95, NULL)); + +insert into bigrangejoin_right values + (7, int4range(NULL,3)), + (92, int4range(99, NULL)); + +create temp table rangejoin_result1 + (i1 int, ir1 int4range, i2 int, ir2 int4range); +create temp table rangejoin_result2 + (i1 int, ir1 int4range, i2 int, ir2 int4range); + +set enable_hashjoin=false; +explain (costs off) insert into rangejoin_result1 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + +insert into rangejoin_result1 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; +set enable_hashjoin=true; + +set enable_mergejoin=false; +explain (costs off) insert into rangejoin_result2 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; + +insert into rangejoin_result2 + select i1, ir1, i2, ir2 + from bigrangejoin_left left join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by ir1 nulls first, i1 desc; +set enable_mergejoin=true; + +select count(*) from rangejoin_result1; +select count(*) from rangejoin_result2; + +select * from rangejoin_result1 except select * from rangejoin_result2; + +select * from rangejoin_result2 except select * from rangejoin_result1; + +drop table rangejoin_result1; +drop table rangejoin_result2; + +create temp table rangejoin_result3 + (i1 int, ir1 int4range, i2 int, ir2 int4range); +create temp table rangejoin_result4 + (i1 int, ir1 int4range, i2 int, ir2 int4range); + + +explain (costs off) insert into rangejoin_result3 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; + +insert into rangejoin_result3 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; + +set enable_mergejoin=false; +explain (costs off) insert into rangejoin_result4 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; + +insert into rangejoin_result4 + select i1, ir1, i2, ir2 + from bigrangejoin_left inner join bigrangejoin_right + on (i1 = i2 and ir1 && ir2) + order by i1, ir1; +set enable_mergejoin=true; + +select count(*) from rangejoin_result3; +select count(*) from rangejoin_result4; + +select * from rangejoin_result3 except select * from rangejoin_result4; + +select * from rangejoin_result4 except select * from rangejoin_result3; + +drop table rangejoin_result3; +drop table rangejoin_result4; + +drop table bigrangejoin_left; +drop table bigrangejoin_right;