diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 6d4b9eb3b9..42c6df549f 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -108,8 +108,7 @@ static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); -static void show_resultcache_info(ResultCacheState *rcstate, List *ancestors, - ExplainState *es); +static void show_resultcache_info(NestLoopState *nlstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *hashstate, ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es); @@ -1494,10 +1493,13 @@ ExplainNode(PlanState *planstate, List *ancestors, * For historical reasons, the join type is interpolated * into the node type name... */ - if (((Join *) plan)->jointype != JOIN_INNER) + if (((Join *)plan)->jointype != JOIN_INNER) appendStringInfo(es->str, " %s Join", jointype); else if (!IsA(plan, NestLoop)) appendStringInfoString(es->str, " Join"); + else if (castNode(NestLoop, plan)->paramcache) + appendStringInfoString(es->str, " Cached"); + } else ExplainPropertyText("Join Type", jointype, es); @@ -1883,6 +1885,7 @@ ExplainNode(PlanState *planstate, List *ancestors, } break; case T_NestLoop: + show_resultcache_info((NestLoopState *) planstate, ancestors, es); show_upper_qual(((NestLoop *) plan)->join.joinqual, "Join Filter", planstate, ancestors, es); if (((NestLoop *) plan)->join.joinqual) @@ -1963,10 +1966,10 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_Hash: show_hash_info(castNode(HashState, planstate), es); break; - case T_ResultCache: - show_resultcache_info(castNode(ResultCacheState, planstate), - ancestors, es); - break; + //case T_ResultCache: + // show_resultcache_info(castNode(ResultCacheState, planstate), + // ancestors, es); + // break; default: break; } @@ -3041,15 +3044,19 @@ show_hash_info(HashState *hashstate, ExplainState *es) } static void -show_resultcache_info(ResultCacheState *rcstate, List *ancestors, ExplainState *es) +show_resultcache_info(NestLoopState *nlstate, List *ancestors, ExplainState *es) { - Plan *plan = ((PlanState *) rcstate)->plan; + Plan *plan = ((PlanState *) nlstate)->plan; + ResultCacheState *rcstate; ListCell *lc; List *context; StringInfoData keystr; char *seperator = ""; bool useprefix; + if (nlstate->nl_pcache == NULL) + return; + initStringInfo(&keystr); /* XXX surely we'll always have more than one if we have a resultcache? */ @@ -3060,7 +3067,7 @@ show_resultcache_info(ResultCacheState *rcstate, List *ancestors, ExplainState * plan, ancestors); - foreach(lc, ((ResultCache *) plan)->param_exprs) + foreach(lc, ((NestLoop *) plan)->param_exprs) { Node *expr = (Node *) lfirst(lc); @@ -3086,6 +3093,8 @@ show_resultcache_info(ResultCacheState *rcstate, List *ancestors, ExplainState * if (!es->analyze) return; + + rcstate = nlstate->nl_pcache; if (es->format != EXPLAIN_FORMAT_TEXT) { ExplainPropertyInteger("Cache Hits", NULL, rcstate->stats.cache_hits, es); diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 68920ecd89..f9c2f80c79 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -44,7 +44,7 @@ #include "executor/nodeProjectSet.h" #include "executor/nodeRecursiveunion.h" #include "executor/nodeResult.h" -#include "executor/nodeResultCache.h" +//#include "executor/nodeResultCache.h" #include "executor/nodeSamplescan.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSetOp.h" @@ -250,9 +250,9 @@ ExecReScan(PlanState *node) ExecReScanMaterial((MaterialState *) node); break; - case T_ResultCacheState: - ExecReScanResultCache((ResultCacheState *) node); - break; + //case T_ResultCacheState: + // ExecReScanResultCache((ResultCacheState *) node); + // break; case T_SortState: ExecReScanSort((SortState *) node); diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 459e9dd3e9..37cfa36881 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -35,7 +35,7 @@ #include "executor/nodeIncrementalSort.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" -#include "executor/nodeResultCache.h" +//#include "executor/nodeResultCache.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSort.h" #include "executor/nodeSubplan.h" @@ -294,10 +294,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecAggEstimate((AggState *) planstate, e->pcxt); break; - case T_ResultCacheState: - /* even when not parallel-aware, for EXPLAIN ANALYZE */ - ExecResultCacheEstimate((ResultCacheState *) planstate, e->pcxt); - break; + //case T_ResultCacheState: + // /* even when not parallel-aware, for EXPLAIN ANALYZE */ + // ExecResultCacheEstimate((ResultCacheState *) planstate, e->pcxt); + // break; default: break; } @@ -518,10 +518,10 @@ ExecParallelInitializeDSM(PlanState *planstate, /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecAggInitializeDSM((AggState *) planstate, d->pcxt); break; - case T_ResultCacheState: - /* even when not parallel-aware, for EXPLAIN ANALYZE */ - ExecResultCacheInitializeDSM((ResultCacheState *) planstate, d->pcxt); - break; + //case T_ResultCacheState: + // /* even when not parallel-aware, for EXPLAIN ANALYZE */ + // ExecResultCacheInitializeDSM((ResultCacheState *) planstate, d->pcxt); + // break; default: break; } @@ -998,9 +998,9 @@ ExecParallelReInitializeDSM(PlanState *planstate, case T_HashState: case T_SortState: case T_IncrementalSortState: - case T_ResultCacheState: - /* these nodes have DSM state, but no reinitialization is required */ - break; + //case T_ResultCacheState: + // /* these nodes have DSM state, but no reinitialization is required */ + // break; default: break; @@ -1068,9 +1068,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_AggState: ExecAggRetrieveInstrumentation((AggState *) planstate); break; - case T_ResultCacheState: - ExecResultCacheRetrieveInstrumentation((ResultCacheState *) planstate); - break; + //case T_ResultCacheState: + // ExecResultCacheRetrieveInstrumentation((ResultCacheState *) planstate); + // break; default: break; } @@ -1363,11 +1363,11 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecAggInitializeWorker((AggState *) planstate, pwcxt); break; - case T_ResultCacheState: - /* even when not parallel-aware, for EXPLAIN ANALYZE */ - ExecResultCacheInitializeWorker((ResultCacheState *) planstate, - pwcxt); - break; + //case T_ResultCacheState: + // /* even when not parallel-aware, for EXPLAIN ANALYZE */ + // ExecResultCacheInitializeWorker((ResultCacheState *) planstate, + // pwcxt); + // break; default: break; } diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index fbbe667cc1..e5b8c74da7 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -102,7 +102,7 @@ #include "executor/nodeProjectSet.h" #include "executor/nodeRecursiveunion.h" #include "executor/nodeResult.h" -#include "executor/nodeResultCache.h" +//#include "executor/nodeResultCache.h" #include "executor/nodeSamplescan.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSetOp.h" @@ -320,10 +320,10 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; - case T_ResultCache: - result = (PlanState *) ExecInitResultCache((ResultCache *) node, - estate, eflags); - break; + //case T_ResultCache: + // result = (PlanState *) ExecInitResultCache((ResultCache *) node, + // estate, eflags); + // break; case T_Group: result = (PlanState *) ExecInitGroup((Group *) node, @@ -709,9 +709,9 @@ ExecEndNode(PlanState *node) ExecEndIncrementalSort((IncrementalSortState *) node); break; - case T_ResultCacheState: - ExecEndResultCache((ResultCacheState *) node); - break; + //case T_ResultCacheState: + // ExecEndResultCache((ResultCacheState *) node); + // break; case T_GroupState: ExecEndGroup((GroupState *) node); diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index b07c2996d4..97213071d5 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -23,9 +23,21 @@ #include "executor/execdebug.h" #include "executor/nodeNestloop.h" +#include "executor/nodeResultCache.h" #include "miscadmin.h" #include "utils/memutils.h" +static inline TupleTableSlot * +FetchInnerTuple(ResultCacheState *rcstate, PlanState *innerPlan) +{ + /* No caching? Just exec the inner node */ + if (rcstate == NULL) + return ExecProcNode(innerPlan); + /* Otherwise let the cache deal with it */ + else + return ExecResultCache(rcstate, innerPlan); +} + /* ---------------------------------------------------------------- * ExecNestLoop(node) @@ -150,6 +162,11 @@ ExecNestLoop(PlanState *pstate) */ ENL1_printf("rescanning inner plan"); ExecReScan(innerPlan); + + /* When using a result cache, reset the state ready for another lookup */ + if (node->nl_pcache) + ExecResultCacheFinishScan(node->nl_pcache); + } /* @@ -157,7 +174,7 @@ ExecNestLoop(PlanState *pstate) */ ENL1_printf("getting new inner tuple"); - innerTupleSlot = ExecProcNode(innerPlan); + innerTupleSlot = FetchInnerTuple(node->nl_pcache, innerPlan); econtext->ecxt_innertuple = innerTupleSlot; if (TupIsNull(innerTupleSlot)) @@ -345,6 +362,13 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) */ nlstate->nl_NeedNewOuter = true; nlstate->nl_MatchedOuter = false; + nlstate->nl_ParamCache = node->paramcache; + + /* Setup the result cache if enabled */ + if (nlstate->nl_ParamCache) + nlstate->nl_pcache = ExecInitResultCache(node, (PlanState *) nlstate, (PlanState *) innerPlanState(nlstate)); + else + nlstate->nl_pcache = NULL; NL1_printf("ExecInitNestLoop: %s\n", "node initialized"); @@ -352,6 +376,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) return nlstate; } + /* ---------------------------------------------------------------- * ExecEndNestLoop * @@ -380,6 +405,9 @@ ExecEndNestLoop(NestLoopState *node) ExecEndNode(outerPlanState(node)); ExecEndNode(innerPlanState(node)); + if (node->nl_pcache) + ExecEndResultCache(node->nl_pcache); + NL1_printf("ExecEndNestLoop: %s\n", "node processing ended"); } diff --git a/src/backend/executor/nodeResultCache.c b/src/backend/executor/nodeResultCache.c index 09b25ea184..da5edf9c06 100644 --- a/src/backend/executor/nodeResultCache.c +++ b/src/backend/executor/nodeResultCache.c @@ -66,7 +66,6 @@ * subplan without caching anything */ #define RC_END_OF_SCAN 5 /* Ready for rescan */ - /* Helper macros for memory accounting */ #define EMPTY_ENTRY_MEMORY_BYTES(e) (sizeof(ResultCacheEntry) + \ sizeof(ResultCacheKey) + \ @@ -179,7 +178,7 @@ ResultCacheHash_equal(struct resultcache_hash *tb, const ResultCacheKey *key1, const ResultCacheKey *key2) { ResultCacheState *rcstate = (ResultCacheState *) tb->private_data; - ExprContext *econtext = rcstate->ss.ps.ps_ExprContext; + ExprContext *econtext = rcstate->ps_ExprContext; TupleTableSlot *tslot = rcstate->tableslot; TupleTableSlot *pslot = rcstate->probeslot; @@ -223,7 +222,7 @@ prepare_probe_slot(ResultCacheState *rcstate, ResultCacheKey *key) /* Set the probeslot's values based on the current parameter values */ for (int i = 0; i < numKeys; i++) pslot->tts_values[i] = ExecEvalExpr(rcstate->param_exprs[i], - rcstate->ss.ps.ps_ExprContext, + rcstate->ps_ExprContext, &pslot->tts_isnull[i]); } else @@ -243,7 +242,7 @@ prepare_probe_slot(ResultCacheState *rcstate, ResultCacheKey *key) * Remove all tuples from a cache entry, leaving an empty cache entry. * Also update memory accounting to reflect the removal of the tuples. */ -static inline void +static void entry_purge_tuples(ResultCacheState *rcstate, ResultCacheEntry *entry) { ResultCacheTuple *tuple = entry->tuplehead; @@ -590,21 +589,32 @@ cache_store_tuple(ResultCacheState *rcstate, TupleTableSlot *slot) return true; } -static TupleTableSlot * -ExecResultCache(PlanState *pstate) +/* + * Caller to call this after it finishes a parameterized scan + */ +void +ExecResultCacheFinishScan(ResultCacheState *rcstate) +{ + rcstate->rc_status = RC_CACHE_LOOKUP; + + /* nullify pointers used for the last scan */ + rcstate->entry = NULL; + rcstate->last_tuple = NULL; +} + +TupleTableSlot * +ExecResultCache(ResultCacheState *rcstate, PlanState *innerPlan) { - ResultCacheState *node = castNode(ResultCacheState, pstate); - PlanState *outerNode; TupleTableSlot *slot; - switch (node->rc_status) + switch (rcstate->rc_status) { case RC_CACHE_LOOKUP: { ResultCacheEntry *entry; bool found; - Assert(node->entry == NULL); + Assert(rcstate->entry == NULL); /* * We're only ever in this state for the first call of the @@ -619,44 +629,43 @@ ExecResultCache(PlanState *pstate) * one there, we'll try to cache it. */ - /* see if we've got anything cached for the current parameters */ - entry = cache_lookup(node, &found); + /* see if we've got anything cached for the current parameters */ + entry = cache_lookup(rcstate, &found); if (found && entry->complete) { - node->stats.cache_hits += 1; /* stats update */ + rcstate->stats.cache_hits += 1; /* stats update */ /* * Set last_tuple and entry so that the state * RC_CACHE_FETCH_NEXT_TUPLE can easily find the next * tuple for these parameters. */ - node->last_tuple = entry->tuplehead; - node->entry = entry; + rcstate->last_tuple = entry->tuplehead; + rcstate->entry = entry; /* Fetch the first cached tuple, if there is one */ if (entry->tuplehead) { - node->rc_status = RC_CACHE_FETCH_NEXT_TUPLE; + rcstate->rc_status = RC_CACHE_FETCH_NEXT_TUPLE; - slot = node->ss.ps.ps_ResultTupleSlot; - ExecStoreMinimalTuple(entry->tuplehead->mintuple, - slot, false); - - return slot; + ExecClearTuple(rcstate->cachefoundslot); + slot = rcstate->cachefoundslotmin; + ExecStoreMinimalTuple(rcstate->last_tuple->mintuple, slot, false); + return ExecCopySlot(rcstate->cachefoundslot, slot); } else { /* The cache entry is void of any tuples. */ - node->rc_status = RC_END_OF_SCAN; + rcstate->rc_status = RC_END_OF_SCAN; return NULL; } } else { - TupleTableSlot *outerslot; + TupleTableSlot *innerslot; - node->stats.cache_misses += 1; /* stats update */ + rcstate->stats.cache_misses += 1; /* stats update */ if (found) { @@ -668,13 +677,12 @@ ExecResultCache(PlanState *pstate) * guarantee the outer node will produce the tuples in * the same order as it did last time. */ - entry_purge_tuples(node, entry); + entry_purge_tuples(rcstate, entry); } /* Scan the outer node for a tuple to cache */ - outerNode = outerPlanState(node); - outerslot = ExecProcNode(outerNode); - if (TupIsNull(outerslot)) + innerslot = ExecProcNode(innerPlan); + if (TupIsNull(innerslot)) { /* * cache_lookup may have returned NULL due to failure @@ -686,22 +694,22 @@ ExecResultCache(PlanState *pstate) if (likely(entry)) entry->complete = true; - node->rc_status = RC_END_OF_SCAN; + rcstate->rc_status = RC_END_OF_SCAN; return NULL; } - node->entry = entry; + rcstate->entry = entry; /* * If we failed to create the entry or failed to store the * tuple in the entry, then go into bypass mode. */ if (unlikely(entry == NULL || - !cache_store_tuple(node, outerslot))) + !cache_store_tuple(rcstate, innerslot))) { - node->stats.cache_overflows += 1; /* stats update */ + rcstate->stats.cache_overflows += 1; /* stats update */ - node->rc_status = RC_CACHE_BYPASS_MODE; + rcstate->rc_status = RC_CACHE_BYPASS_MODE; /* * No need to clear out last_tuple as we'll stay in @@ -716,43 +724,41 @@ ExecResultCache(PlanState *pstate) * allows cache lookups to work even when the scan has * not been executed to completion. */ - entry->complete = node->singlerow; - node->rc_status = RC_FILLING_CACHE; + entry->complete = rcstate->singlerow; + rcstate->rc_status = RC_FILLING_CACHE; } - slot = node->ss.ps.ps_ResultTupleSlot; - ExecCopySlot(slot, outerslot); - return slot; + return innerslot; } } case RC_CACHE_FETCH_NEXT_TUPLE: { /* We shouldn't be in this state if these are not set */ - Assert(node->entry != NULL); - Assert(node->last_tuple != NULL); + Assert(rcstate->entry != NULL); + Assert(rcstate->last_tuple != NULL); /* Skip to the next tuple to output */ - node->last_tuple = node->last_tuple->next; + rcstate->last_tuple = rcstate->last_tuple->next; /* No more tuples in the cache */ - if (node->last_tuple == NULL) + if (rcstate->last_tuple == NULL) { - node->rc_status = RC_END_OF_SCAN; + rcstate->rc_status = RC_END_OF_SCAN; return NULL; } - slot = node->ss.ps.ps_ResultTupleSlot; - ExecStoreMinimalTuple(node->last_tuple->mintuple, slot, - false); + ExecClearTuple(rcstate->cachefoundslot); + slot = rcstate->cachefoundslotmin; + ExecStoreMinimalTuple(rcstate->last_tuple->mintuple, slot, false); - return slot; + return ExecCopySlot(rcstate->cachefoundslot, slot); } case RC_FILLING_CACHE: { - TupleTableSlot *outerslot; - ResultCacheEntry *entry = node->entry; + TupleTableSlot *innerslot; + ResultCacheEntry *entry = rcstate->entry; /* entry should already have been set by RC_CACHE_LOOKUP */ Assert(entry != NULL); @@ -762,13 +768,12 @@ ExecResultCache(PlanState *pstate) * miss and are populating the cache with the current scan * tuples. */ - outerNode = outerPlanState(node); - outerslot = ExecProcNode(outerNode); - if (TupIsNull(outerslot)) + innerslot = ExecProcNode(innerPlan); + if (TupIsNull(innerslot)) { /* No more tuples. Mark it as complete */ entry->complete = true; - node->rc_status = RC_END_OF_SCAN; + rcstate->rc_status = RC_END_OF_SCAN; return NULL; } else @@ -782,12 +787,12 @@ ExecResultCache(PlanState *pstate) elog(ERROR, "cache entry already complete"); /* Record the tuple in the current cache entry */ - if (unlikely(!cache_store_tuple(node, outerslot))) + if (unlikely(!cache_store_tuple(rcstate, innerslot))) { /* Couldn't store it? Handle overflow */ - node->stats.cache_overflows += 1; /* stats update */ + rcstate->stats.cache_overflows += 1; /* stats update */ - node->rc_status = RC_CACHE_BYPASS_MODE; + rcstate->rc_status = RC_CACHE_BYPASS_MODE; /* * No need to clear out entry or last_tuple as we'll @@ -795,32 +800,27 @@ ExecResultCache(PlanState *pstate) */ } - slot = node->ss.ps.ps_ResultTupleSlot; - ExecCopySlot(slot, outerslot); - return slot; + return innerslot; } } case RC_CACHE_BYPASS_MODE: { - TupleTableSlot *outerslot; + TupleTableSlot *innerslot; /* * When in bypass mode we just continue to read tuples without * caching. We need to wait until the next rescan before we * can come out of this mode. */ - outerNode = outerPlanState(node); - outerslot = ExecProcNode(outerNode); - if (TupIsNull(outerslot)) + innerslot = ExecProcNode(innerPlan); + if (TupIsNull(innerslot)) { - node->rc_status = RC_END_OF_SCAN; + rcstate->rc_status = RC_END_OF_SCAN; return NULL; } - slot = node->ss.ps.ps_ResultTupleSlot; - ExecCopySlot(slot, outerslot); - return slot; + return innerslot; } case RC_END_OF_SCAN: @@ -833,60 +833,34 @@ ExecResultCache(PlanState *pstate) default: elog(ERROR, "unrecognized resultcache state: %d", - (int) node->rc_status); + (int) rcstate->rc_status); return NULL; } /* switch */ } ResultCacheState * -ExecInitResultCache(ResultCache *node, EState *estate, int eflags) +ExecInitResultCache(NestLoop *node, PlanState *planstate, PlanState *cache_planstate) { ResultCacheState *rcstate = makeNode(ResultCacheState); - Plan *outerNode; int i; int nkeys; Oid *eqfuncoids; - /* check for unsupported flags */ - Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); - - rcstate->ss.ps.plan = (Plan *) node; - rcstate->ss.ps.state = estate; - rcstate->ss.ps.ExecProcNode = ExecResultCache; - - /* - * Miscellaneous initialization - * - * create expression context for node - */ - ExecAssignExprContext(estate, &rcstate->ss.ps); - - outerNode = outerPlan(node); - outerPlanState(rcstate) = ExecInitNode(outerNode, estate, eflags); - - /* - * Initialize return slot and type. No need to initialize projection info - * because this node doesn't do projections. - */ - ExecInitResultTupleSlotTL(&rcstate->ss.ps, &TTSOpsMinimalTuple); - rcstate->ss.ps.ps_ProjInfo = NULL; - - /* - * Initialize scan slot and type. - */ - ExecCreateScanSlotFromOuterPlan(estate, &rcstate->ss, &TTSOpsMinimalTuple); - - /* - * Set the state machine to lookup the cache. We won't find anything - * until we cache something, but this saves a special case to create the - * first entry. - */ + rcstate->ps_ExprContext = CreateExprContext(planstate->state); rcstate->rc_status = RC_CACHE_LOOKUP; rcstate->nkeys = nkeys = node->numKeys; rcstate->hashkeydesc = ExecTypeFromExprList(node->param_exprs); rcstate->tableslot = MakeSingleTupleTableSlot(rcstate->hashkeydesc, &TTSOpsMinimalTuple); + /* XXX this should make a slot the same type as cache_planstates result slot. For now + * that'll always be a nested loop, so just make a virtual slot, which is what nested loop + * uses. + */ + rcstate->cachefoundslot = MakeSingleTupleTableSlot(cache_planstate->ps_ResultTupleDesc, + &TTSOpsVirtual); + rcstate->cachefoundslotmin = MakeSingleTupleTableSlot(cache_planstate->ps_ResultTupleDesc, + &TTSOpsMinimalTuple); rcstate->probeslot = MakeSingleTupleTableSlot(rcstate->hashkeydesc, &TTSOpsVirtual); @@ -910,7 +884,7 @@ ExecInitResultCache(ResultCache *node, EState *estate, int eflags) fmgr_info(left_hashfn, &rcstate->hashfunctions[i]); - rcstate->param_exprs[i] = ExecInitExpr(param_expr, (PlanState *) rcstate); + rcstate->param_exprs[i] = ExecInitExpr(param_expr, (PlanState *)planstate); eqfuncoids[i] = get_opcode(hashop); } @@ -919,7 +893,7 @@ ExecInitResultCache(ResultCache *node, EState *estate, int eflags) eqfuncoids, node->collations, node->param_exprs, - (PlanState *) rcstate); + (PlanState *) planstate); pfree(eqfuncoids); rcstate->mem_used = 0; @@ -970,57 +944,12 @@ ExecInitResultCache(ResultCache *node, EState *estate, int eflags) void ExecEndResultCache(ResultCacheState *node) { - /* - * When ending a parallel worker, copy the statistics gathered by the - * worker back into shared memory so that it can be picked up by the main - * process to report in EXPLAIN ANALYZE. - */ - if (node->shared_info && IsParallelWorker()) - { - ResultCacheInstrumentation *si; - - Assert(ParallelWorkerNumber <= node->shared_info->num_workers); - si = &node->shared_info->sinstrument[ParallelWorkerNumber]; - memcpy(si, &node->stats, sizeof(ResultCacheInstrumentation)); - } - /* Remove the cache context */ MemoryContextDelete(node->tableContext); - ExecClearTuple(node->ss.ss_ScanTupleSlot); - /* must drop pointer to cache result tuple */ - ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); - - /* - * free exprcontext - */ - ExecFreeExprContext(&node->ss.ps); - - /* - * shut down the subplan - */ - ExecEndNode(outerPlanState(node)); -} - -void -ExecReScanResultCache(ResultCacheState *node) -{ - PlanState *outerPlan = outerPlanState(node); - - /* Mark that we must lookup the cache for a new set of parameters */ - node->rc_status = RC_CACHE_LOOKUP; - - /* nullify pointers used for the last scan */ - node->entry = NULL; - node->last_tuple = NULL; - - /* - * if chgParam of subnode is not null then plan will be re-scanned by - * first ExecProcNode. - */ - if (outerPlan->chgParam == NULL) - ExecReScan(outerPlan); - + ExecClearTuple(node->cachefoundslot); + ExecClearTuple(node->cachefoundslotmin); + FreeExprContext(node->ps_ExprContext, false); } /* @@ -1035,88 +964,3 @@ ExecEstimateCacheEntryOverheadBytes(double ntuples) sizeof(ResultCacheTuple) * ntuples; } -/* ---------------------------------------------------------------- - * Parallel Query Support - * ---------------------------------------------------------------- - */ - - /* ---------------------------------------------------------------- - * ExecResultCacheEstimate - * - * Estimate space required to propagate result cache statistics. - * ---------------------------------------------------------------- - */ -void -ExecResultCacheEstimate(ResultCacheState *node, ParallelContext *pcxt) -{ - Size size; - - /* don't need this if not instrumenting or no workers */ - if (!node->ss.ps.instrument || pcxt->nworkers == 0) - return; - - size = mul_size(pcxt->nworkers, sizeof(ResultCacheInstrumentation)); - size = add_size(size, offsetof(SharedResultCacheInfo, sinstrument)); - shm_toc_estimate_chunk(&pcxt->estimator, size); - shm_toc_estimate_keys(&pcxt->estimator, 1); -} - -/* ---------------------------------------------------------------- - * ExecResultCacheInitializeDSM - * - * Initialize DSM space for result cache statistics. - * ---------------------------------------------------------------- - */ -void -ExecResultCacheInitializeDSM(ResultCacheState *node, ParallelContext *pcxt) -{ - Size size; - - /* don't need this if not instrumenting or no workers */ - if (!node->ss.ps.instrument || pcxt->nworkers == 0) - return; - - size = offsetof(SharedResultCacheInfo, sinstrument) - + pcxt->nworkers * sizeof(ResultCacheInstrumentation); - node->shared_info = shm_toc_allocate(pcxt->toc, size); - /* ensure any unfilled slots will contain zeroes */ - memset(node->shared_info, 0, size); - node->shared_info->num_workers = pcxt->nworkers; - shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, - node->shared_info); -} - -/* ---------------------------------------------------------------- - * ExecResultCacheInitializeWorker - * - * Attach worker to DSM space for result cache statistics. - * ---------------------------------------------------------------- - */ -void -ExecResultCacheInitializeWorker(ResultCacheState *node, ParallelWorkerContext *pwcxt) -{ - node->shared_info = - shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true); -} - -/* ---------------------------------------------------------------- - * ExecResultCacheRetrieveInstrumentation - * - * Transfer result cache statistics from DSM to private memory. - * ---------------------------------------------------------------- - */ -void -ExecResultCacheRetrieveInstrumentation(ResultCacheState *node) -{ - Size size; - SharedResultCacheInfo *si; - - if (node->shared_info == NULL) - return; - - size = offsetof(SharedResultCacheInfo, sinstrument) - + node->shared_info->num_workers * sizeof(ResultCacheInstrumentation); - si = palloc(size); - memcpy(si, node->shared_info, size); - node->shared_info = si; -} diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e50844df9b..0101d719c4 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2298,148 +2298,6 @@ cost_material(Path *path, path->total_cost = startup_cost + run_cost; } -/* - * cost_resultcache_rescan - * Determines the estimated cost of rescanning a ResultCache node. - * - * In order to estimate this, we must gain knowledge of how often we expect to - * be called and how many distinct sets of parameters we are likely to be - * called with. If we expect a good cache hit ratio, then we can set our - * costs to account for that hit ratio, plus a little bit of cost for the - * caching itself. Caching will not work out well if we expect to be called - * with too many distinct parameter values. The worst-case here is that we - * never see the same parameter values twice, in which case we'd never get a - * cache hit and caching would be a complete waste of effort. - */ -static void -cost_resultcache_rescan(PlannerInfo *root, ResultCachePath *rcpath, - Cost *rescan_startup_cost, Cost *rescan_total_cost) -{ - Cost input_startup_cost = rcpath->subpath->startup_cost; - Cost input_total_cost = rcpath->subpath->total_cost; - double tuples = rcpath->subpath->rows; - double calls = rcpath->calls; - int width = rcpath->subpath->pathtarget->width; - int flags; - - double work_mem_bytes; - double est_entry_bytes; - double est_cache_entries; - double ndistinct; - double evict_ratio; - double hit_ratio; - Cost startup_cost; - Cost total_cost; - - /* available cache space */ - work_mem_bytes = work_mem * 1024L; - - /* - * Set the number of bytes each cache entry should consume in the cache. - * To provide us with better estimations on how many cache entries we can - * store at once we make a call to the excutor here to ask it what memory - * overheads there are for a single cache entry. - * - * XXX we also store the cache key, but that's not accounted for here. - */ - est_entry_bytes = relation_byte_size(tuples, width) + - ExecEstimateCacheEntryOverheadBytes(tuples); - - /* estimate on the upper limit of cache entries we can hold at once */ - est_cache_entries = floor(work_mem_bytes / est_entry_bytes); - - /* estimate on the distinct number of parameter values */ - ndistinct = estimate_num_groups(root, rcpath->param_exprs, calls, NULL, - &flags); - - /* - * When the estimation fell back on using a default value, it's a bit too - * risky to assume that it's ok to use a Result Cache. The use of a - * default could cause us to use a Result Cache when it's really - * inappropriate to do so. If we see that this has been done then we'll - * assume that every call will have unique parameters, which will almost - * certainly mean a ResultCachePath will never survive add_path(). - */ - if ((flags & SELFLAG_USED_DEFAULT) != 0) - ndistinct = calls; - - /* - * Since we've already estimated the maximum number of entries we can - * store at once and know the estimated number of distinct values we'll be - * called with, well take this opportunity to set the path's est_entries. - * This will ultimately determine the hash table size that the executor - * will use. If we leave this at zero the executor will just choose the - * size itself. Really this is not the right place to do this, but it's - * convenient since everything is already calculated. - */ - rcpath->est_entries = Min(Min(ndistinct, est_cache_entries), - PG_UINT32_MAX); - - - /* - * When the number of distinct parameter values is above the amount we can - * store in the cache, then we'll have to evict some entries from the - * cache. This is not free, so here we estimate how often we'll incur the - * cost of that eviction. - */ - evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct; - - /* - * In order to estimate how costly a single scan will be, we need to - * attempt to estimate what the cache hit ratio will be. To do that we - * must look at how many scans are estimated in total of this node and how - * many of those scans we expect to get a cache hit. - */ - hit_ratio = 1.0 / ndistinct * Min(est_cache_entries, ndistinct) - - (ndistinct / calls); - - /* Ensure we don't go negative */ - hit_ratio = Max(hit_ratio, 0); - - /* - * Set the total_cost accounting for the expected cache hit ratio. We - * also add on a cpu_operator_cost to account for a cache lookup. This - * will happen regardless of if it's a cache hit or not. - */ - total_cost = input_total_cost * (1.0 - hit_ratio) + cpu_operator_cost; - - /* Now adjust the total cost to account for cache evictions */ - - /* Charge a cpu_tuple_cost for evicting the actual cache entry */ - total_cost += cpu_tuple_cost * evict_ratio; - - /* - * Charge a 10th of cpu_operator_cost to evict every tuple in that entry. - * The per-tuple eviction is really just a pfree, so charging a whole - * cpu_operator_cost seems a little excessive. - */ - total_cost += cpu_operator_cost / 10.0 * evict_ratio * tuples; - - /* - * Now adjust for storing things in the cache, since that's not free - * either. Everything must go in the cache, so we don't proportion this - * over any ratio, just apply it once for the scan. We charge a - * cpu_tuple_cost for the creation of the cache entry and also a - * cpu_operator_cost for each tuple we expect to cache. - */ - total_cost += cpu_tuple_cost + cpu_operator_cost * tuples; - - /* - * Getting the first row must be also be proportioned according to the - * expected cache hit ratio. - */ - startup_cost = input_startup_cost * (1.0 - hit_ratio); - - /* - * Additionally we charge a cpu_tuple_cost to account for cache lookups, - * which we'll do regardless of if it was a cache hit or not. - */ - startup_cost += cpu_tuple_cost; - - *rescan_startup_cost = startup_cost; - *rescan_total_cost = total_cost; -} - /* * cost_agg * Determines and returns the cost of performing an Agg plan node, @@ -4167,11 +4025,6 @@ cost_rescan(PlannerInfo *root, Path *path, *rescan_total_cost = run_cost; } break; - case T_ResultCache: - /* All the hard work is done by cost_resultcache_rescan */ - cost_resultcache_rescan(root, (ResultCachePath *) path, - rescan_startup_cost, rescan_total_cost); - break; default: *rescan_startup_cost = path->startup_cost; *rescan_total_cost = path->total_cost; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index f4c76577ad..5918dd9a3a 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -17,13 +17,16 @@ #include #include "executor/executor.h" +#include "executor/nodeResultCache.h" #include "foreign/fdwapi.h" +#include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" +#include "utils/selfuncs.h" #include "utils/typcache.h" /* Hook for plugins to get control in add_paths_to_joinrel() */ @@ -554,6 +557,152 @@ get_resultcache_path(PlannerInfo *root, RelOptInfo *innerrel, return NULL; } +static double +relation_byte_size(double tuples, int width) +{ + return tuples * (MAXALIGN(width) + MAXALIGN(SizeofHeapTupleHeader)); +} + +/* + * cost_resultcache_rescan + * Determines the estimated cost of rescanning a ResultCache node. + * + * In order to estimate this, we must gain knowledge of how often we expect to + * be called and how many distinct sets of parameters we are likely to be + * called with. If we expect a good cache hit ratio, then we can set our + * costs to account for that hit ratio, plus a little bit of cost for the + * caching itself. Caching will not work out well if we expect to be called + * with too many distinct parameter values. The worst-case here is that we + * never see the same parameter values twice, in which case we'd never get a + * cache hit and caching would be a complete waste of effort. + */ +static bool +use_nestedloop_cache(PlannerInfo *root, NestPath *nlpath) +{ + Cost input_startup_cost = nlpath->innerjoinpath->startup_cost; + Cost input_total_cost = nlpath->innerjoinpath->total_cost; + double tuples = nlpath->innerjoinpath->rows; + double calls = nlpath->outerjoinpath->rows; + int width = nlpath->innerjoinpath->pathtarget->width; + int flags; + + double work_mem_bytes; + double est_entry_bytes; + double est_cache_entries; + double ndistinct; + double evict_ratio; + double hit_ratio; + Cost startup_cost; + Cost total_cost; + + /* available cache space */ + work_mem_bytes = work_mem * 1024L; + + /* + * Set the number of bytes each cache entry should consume in the cache. + * To provide us with better estimations on how many cache entries we can + * store at once we make a call to the excutor here to ask it what memory + * overheads there are for a single cache entry. + * + * XXX we also store the cache key, but that's not accounted for here. + */ + est_entry_bytes = relation_byte_size(tuples, width) + + ExecEstimateCacheEntryOverheadBytes(tuples); + + /* estimate on the upper limit of cache entries we can hold at once */ + est_cache_entries = floor(work_mem_bytes / est_entry_bytes); + + /* estimate on the distinct number of parameter values */ + ndistinct = 1; // estimate_num_groups(root, nlpath->rcpath->param_exprs, calls, NULL, + //&flags); + + /* + * When the estimation fell back on using a default value, it's a bit too + * risky to assume that it's ok to use a Result Cache. The use of a + * default could cause us to use a Result Cache when it's really + * inappropriate to do so. If we see that this has been done then we'll + * assume that every call will have unique parameters, which will almost + * certainly mean a ResultCachePath will never survive add_path(). + */ + if ((flags & SELFLAG_USED_DEFAULT) != 0) + ndistinct = calls; + + /* + * Since we've already estimated the maximum number of entries we can + * store at once and know the estimated number of distinct values we'll be + * called with, well take this opportunity to set the path's est_entries. + * This will ultimately determine the hash table size that the executor + * will use. If we leave this at zero the executor will just choose the + * size itself. Really this is not the right place to do this, but it's + * convenient since everything is already calculated. + */ + //nlpath->est_entries = Min(Min(ndistinct, est_cache_entries), + // PG_UINT32_MAX); + + + /* + * When the number of distinct parameter values is above the amount we can + * store in the cache, then we'll have to evict some entries from the + * cache. This is not free, so here we estimate how often we'll incur the + * cost of that eviction. + */ + evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct; + + /* + * In order to estimate how costly a single scan will be, we need to + * attempt to estimate what the cache hit ratio will be. To do that we + * must look at how many scans are estimated in total of this node and how + * many of those scans we expect to get a cache hit. + */ + hit_ratio = 1.0 / ndistinct * Min(est_cache_entries, ndistinct) - + (ndistinct / calls); + + /* Ensure we don't go negative */ + hit_ratio = Max(hit_ratio, 0); + + /* + * Set the total_cost accounting for the expected cache hit ratio. We + * also add on a cpu_operator_cost to account for a cache lookup. This + * will happen regardless of if it's a cache hit or not. + */ + total_cost = input_total_cost * (1.0 - hit_ratio) + cpu_operator_cost; + + /* Now adjust the total cost to account for cache evictions */ + + /* Charge a cpu_tuple_cost for evicting the actual cache entry */ + total_cost += cpu_tuple_cost * evict_ratio; + + /* + * Charge a 10th of cpu_operator_cost to evict every tuple in that entry. + * The per-tuple eviction is really just a pfree, so charging a whole + * cpu_operator_cost seems a little excessive. + */ + total_cost += cpu_operator_cost / 10.0 * evict_ratio * tuples; + + /* + * Now adjust for storing things in the cache, since that's not free + * either. Everything must go in the cache, so we don't proportion this + * over any ratio, just apply it once for the scan. We charge a + * cpu_tuple_cost for the creation of the cache entry and also a + * cpu_operator_cost for each tuple we expect to cache. + */ + total_cost += cpu_tuple_cost + cpu_operator_cost * tuples; + + /* + * Getting the first row must be also be proportioned according to the + * expected cache hit ratio. + */ + startup_cost = input_startup_cost * (1.0 - hit_ratio); + + /* + * Additionally we charge a cpu_tuple_cost to account for cache lookups, + * which we'll do regardless of if it was a cache hit or not. + */ + startup_cost += cpu_tuple_cost; + + return total_cost < nlpath->innerjoinpath->total_cost; +} + /* * try_nestloop_path * Consider a nestloop join path; if it appears useful, push it into @@ -576,8 +725,7 @@ try_nestloop_path(PlannerInfo *root, Relids outerrelids; Relids inner_paramrels = PATH_REQ_OUTER(inner_path); Relids outer_paramrels = PATH_REQ_OUTER(outer_path); - Path *inner_cache_path; - bool added_path = false; + ResultCachePath *rcpath; /* * Paths are parameterized by top-level parents, so run parameterization @@ -628,6 +776,7 @@ try_nestloop_path(PlannerInfo *root, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer)) { + NestPath *nlpath; /* * If the inner path is parameterized, it is parameterized by the * topmost parent of the outer rel, not the outer rel itself. Fix @@ -649,103 +798,37 @@ try_nestloop_path(PlannerInfo *root, } } - add_path(joinrel, (Path *) - create_nestloop_path(root, - joinrel, - jointype, - &workspace, - extra, - outer_path, - inner_path, - extra->restrictlist, - pathkeys, - required_outer)); - added_path = true; - } - - /* - * See if we can build a result cache path for this inner_path. That might - * make the nested loop cheaper. - */ - inner_cache_path = get_resultcache_path(root, innerrel, outerrel, - inner_path, outer_path, jointype, - extra); - - if (inner_cache_path == NULL) - { - if (!added_path) - bms_free(required_outer); - return; - } - - initial_cost_nestloop(root, &workspace, jointype, - outer_path, inner_cache_path, extra); - - if (add_path_precheck(joinrel, - workspace.startup_cost, workspace.total_cost, - pathkeys, required_outer)) - { /* - * If the inner path is parameterized, it is parameterized by the - * topmost parent of the outer rel, not the outer rel itself. Fix - * that. + * See if we can build a result cache path for this inner_path. That might + * make the nested loop cheaper. */ - if (PATH_PARAM_BY_PARENT(inner_cache_path, outer_path->parent)) + rcpath = (ResultCachePath *) get_resultcache_path(root, innerrel, outerrel, + inner_path, outer_path, jointype, + extra); + + nlpath = create_nestloop_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + required_outer); + + if (rcpath != NULL) { - Path *reparameterize_path; - - reparameterize_path = reparameterize_path_by_child(root, - inner_cache_path, - outer_path->parent); - - /* - * If we could not translate the path, we can't create nest loop - * path. - */ - if (!reparameterize_path) - { - ResultCachePath *rcpath = (ResultCachePath *) inner_cache_path; - - /* Waste no memory when we reject a path here */ - list_free(rcpath->hash_operators); - list_free(rcpath->param_exprs); - pfree(rcpath); - - if (!added_path) - bms_free(required_outer); - return; - } + nlpath->use_cache = true; + nlpath->hash_operators = rcpath->hash_operators; + nlpath->param_exprs = rcpath->param_exprs; + nlpath->singlerow = rcpath->singlerow; + nlpath->calls = rcpath->calls; + nlpath->est_entries = rcpath->est_entries; } - add_path(joinrel, (Path *) - create_nestloop_path(root, - joinrel, - jointype, - &workspace, - extra, - outer_path, - inner_cache_path, - extra->restrictlist, - pathkeys, - required_outer)); - added_path = true; + add_path(joinrel, (Path *)nlpath); } - else - { - ResultCachePath *rcpath = (ResultCachePath *) inner_cache_path; - - /* Waste no memory when we reject a path here */ - list_free(rcpath->hash_operators); - list_free(rcpath->param_exprs); - pfree(rcpath); - } - - if (!added_path) - { - /* Waste no memory when we reject a path here */ - bms_free(required_outer); - } - } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 45e211262a..7afb7741d0 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4147,6 +4147,7 @@ create_nestloop_plan(PlannerInfo *root, Relids outerrelids; List *nestParams; Relids saveOuterRels = root->curOuterRels; + List *param_exprs = NIL; /* NestLoop can project, so no need to be picky about child tlists */ outer_plan = create_plan_recurse(root, best_path->outerjoinpath, 0); @@ -4157,6 +4158,9 @@ create_nestloop_plan(PlannerInfo *root, inner_plan = create_plan_recurse(root, best_path->innerjoinpath, 0); + param_exprs = (List *) replace_nestloop_params(root, (Node *) + best_path->param_exprs); + /* Restore curOuterRels */ bms_free(root->curOuterRels); root->curOuterRels = saveOuterRels; @@ -4204,6 +4208,54 @@ create_nestloop_plan(PlannerInfo *root, best_path->jointype, best_path->inner_unique); + //bool paramcache; + //int numKeys; /* size of the two arrays below */ + + //Oid *hashOperators; /* hash operators for each key */ + //Oid *collations; /* cache keys */ + //List *param_exprs; /* exprs containing parameters */ + //bool singlerow; /* true if the cache entry should be marked as + // * complete after we store the first tuple in + // * it. */ + //uint32 est_entries; /* The maximum number of entries that the + // * planner expects will fit in the cache, or 0 + // * if unknown */ + + if (best_path->use_cache) + { + Oid *operators; + Oid *collations; + ListCell *lc; + ListCell *lc2; + int nkeys; + int i; + + join_plan->numKeys = list_length(best_path->param_exprs); + + nkeys = list_length(param_exprs); + Assert(nkeys > 0); + operators = palloc(nkeys * sizeof(Oid)); + collations = palloc(nkeys * sizeof(Oid)); + + i = 0; + forboth(lc, param_exprs, lc2, best_path->hash_operators) + { + Expr *param_expr = (Expr *)lfirst(lc); + Oid opno = lfirst_oid(lc2); + + operators[i] = opno; + collations[i] = exprCollation((Node *)param_expr); + i++; + } + join_plan->paramcache = true; + join_plan->param_exprs = param_exprs; + join_plan->hashOperators = operators; + join_plan->collations = collations; + join_plan->singlerow = best_path->singlerow; + join_plan->est_entries = best_path->est_entries; + + } + copy_generic_path_info(&join_plan->join.plan, &best_path->path); return join_plan; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 3e2c61b0a0..9da223139a 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -137,74 +137,6 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod, *colcollation = InvalidOid; } - -/* - * outer_params_hashable - * Determine if it's valid to use a ResultCache node to cache already - * seen rows matching a given set of parameters instead of performing a - * rescan of the subplan pointed to by 'subroot'. If it's valid, check - * if all parameters required by this query level can be hashed. If so, - * return true and set 'operators' to the list of hash equality operators - * for the given parameters then populate 'param_exprs' with each - * PARAM_EXEC parameter that the subplan requires the outer query to pass - * it. When hashing is not possible, false is returned and the two - * output lists are unchanged. - */ -static bool -outer_params_hashable(PlannerInfo *subroot, List *plan_params, List **operators, - List **param_exprs) -{ - List *oplist = NIL; - List *exprlist = NIL; - ListCell *lc; - - /* Ensure we're not given a top-level query. */ - Assert(subroot->parent_root != NULL); - - /* - * It's not valid to use a Result Cache node if there are any volatile - * function in the subquery. Caching could cause fewer evaluations of - * volatile functions that have side-effects - */ - if (contain_volatile_functions((Node *) subroot->parse)) - return false; - - foreach(lc, plan_params) - { - PlannerParamItem *ppi = (PlannerParamItem *) lfirst(lc); - TypeCacheEntry *typentry; - Node *expr = ppi->item; - Param *param; - - param = makeNode(Param); - param->paramkind = PARAM_EXEC; - param->paramid = ppi->paramId; - param->paramtype = exprType(expr); - param->paramtypmod = exprTypmod(expr); - param->paramcollid = exprCollation(expr); - param->location = -1; - - typentry = lookup_type_cache(param->paramtype, - TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR); - - /* XXX will eq_opr ever be invalid if hash_proc isn't? */ - if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr)) - { - list_free(oplist); - list_free(exprlist); - return false; - } - - oplist = lappend_oid(oplist, typentry->eq_opr); - exprlist = lappend(exprlist, param); - } - - *operators = oplist; - *param_exprs = exprlist; - - return true; /* all params can be hashed */ -} - /* * Convert a SubLink (as created by the parser) into a SubPlan. * @@ -311,30 +243,30 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, * regardless. It may be useful if we can only do this when it seems * likely that we'll get some repeat lookups, i.e. cache hits. */ - if (enable_resultcache && plan_params != NIL && subLinkType == EXPR_SUBLINK) - { - List *operators; - List *param_exprs; - - /* Determine if all the subplan parameters can be hashed */ - if (outer_params_hashable(subroot, plan_params, &operators, ¶m_exprs)) - { - ResultCachePath *cache_path; - - /* - * Pass -1 for the number of calls since we don't have any ideas - * what that'll be. - */ - cache_path = create_resultcache_path(root, - best_path->parent, - best_path, - param_exprs, - operators, - false, - -1); - best_path = (Path *) cache_path; - } - } + //if (enable_resultcache && plan_params != NIL && subLinkType == EXPR_SUBLINK) + //{ + // List *operators; + // List *param_exprs; + + // /* Determine if all the subplan parameters can be hashed */ + // if (outer_params_hashable(subroot, plan_params, &operators, ¶m_exprs)) + // { + // ResultCachePath *cache_path; + + // /* + // * Pass -1 for the number of calls since we don't have any ideas + // * what that'll be. + // */ + // cache_path = create_resultcache_path(root, + // best_path->parent, + // best_path, + // param_exprs, + // operators, + // false, + // -1); + // best_path = (Path *) cache_path; + // } + //} plan = create_plan(subroot, best_path); diff --git a/src/include/executor/nodeResultCache.h b/src/include/executor/nodeResultCache.h index d2f3ed9a74..440019d141 100644 --- a/src/include/executor/nodeResultCache.h +++ b/src/include/executor/nodeResultCache.h @@ -15,16 +15,11 @@ #include "nodes/execnodes.h" -extern ResultCacheState *ExecInitResultCache(ResultCache *node, EState *estate, int eflags); +extern void ExecResultCacheFinishScan(ResultCacheState *rcstate); +extern TupleTableSlot *ExecResultCache(ResultCacheState *rcstate, PlanState *innerPlan); +extern ResultCacheState *ExecInitResultCache(NestLoop *node, PlanState *planstate, PlanState *cache_planstate); extern void ExecEndResultCache(ResultCacheState *node); extern void ExecReScanResultCache(ResultCacheState *node); extern double ExecEstimateCacheEntryOverheadBytes(double ntuples); -extern void ExecResultCacheEstimate(ResultCacheState *node, - ParallelContext *pcxt); -extern void ExecResultCacheInitializeDSM(ResultCacheState *node, - ParallelContext *pcxt); -extern void ExecResultCacheInitializeWorker(ResultCacheState *node, - ParallelWorkerContext *pwcxt); -extern void ExecResultCacheRetrieveInstrumentation(ResultCacheState *node); #endif /* NODERESULTCACHE_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 30f66d5058..a2a70151c9 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1855,12 +1855,15 @@ typedef struct JoinState * NullInnerTupleSlot prepared null tuple for left outer joins * ---------------- */ +struct ResultCacheState; typedef struct NestLoopState { JoinState js; /* its first field is NodeTag */ bool nl_NeedNewOuter; bool nl_MatchedOuter; + bool nl_ParamCache; TupleTableSlot *nl_NullInnerTupleSlot; + struct ResultCacheState *nl_pcache; } NestLoopState; /* ---------------- @@ -2022,12 +2025,15 @@ typedef struct SharedResultCacheInfo */ typedef struct ResultCacheState { - ScanState ss; /* its first field is NodeTag */ + ExprContext *ps_ExprContext; /* node's expression-evaluation context */ + //ScanState ss; /* its first field is NodeTag */ int rc_status; /* value of ExecResultCache's state machine */ int nkeys; /* number of hash table keys */ struct resultcache_hash *hashtable; /* hash table cache entries */ TupleDesc hashkeydesc; /* tuple descriptor for hash keys */ TupleTableSlot *tableslot; /* min tuple slot for existing cache entries */ + TupleTableSlot *cachefoundslot; /* Slot to return found cache entries */ + TupleTableSlot *cachefoundslotmin; /* Slot to return found cache entries */ TupleTableSlot *probeslot; /* virtual slot used for hash lookups */ ExprState *cache_eq_expr; /* Compare exec params to hash key */ ExprState **param_exprs; /* exprs containing the parameters to this diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 79a4ad20dd..31b158026c 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1546,6 +1546,16 @@ typedef struct JoinPath List *joinrestrictinfo; /* RestrictInfos to apply to join */ + bool use_cache; + List *hash_operators; /* hash operators for each key */ + List *param_exprs; /* cache keys */ + bool singlerow; /* true if the cache entry is to be marked as + * complete after caching the first record. */ + double calls; /* expected number of rescans */ + uint32 est_entries; /* The maximum number of entries that the + * planner expects will fit in the cache, or 0 + * if unknown */ + /* * See the notes for RelOptInfo and ParamPathInfo to understand why * joinrestrictinfo is needed in JoinPath, and can't be merged into the diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index ac5685da64..f989d31033 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -701,6 +701,18 @@ typedef struct NestLoop { Join join; List *nestParams; /* list of NestLoopParam nodes */ + bool paramcache; + int numKeys; /* size of the two arrays below */ + + Oid *hashOperators; /* hash operators for each key */ + Oid *collations; /* cache keys */ + List *param_exprs; /* exprs containing parameters */ + bool singlerow; /* true if the cache entry should be marked as + * complete after we store the first tuple in + * it. */ + uint32 est_entries; /* The maximum number of entries that the + * planner expects will fit in the cache, or 0 + * if unknown */ } NestLoop; typedef struct NestLoopParam