diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index b9d42b15a18..125b8addd9b 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -36,6 +36,7 @@ #include "commands/progress.h" #include "executor/executor.h" #include "miscadmin.h" +#include "optimizer/cost.h" #include "pgstat.h" #include "storage/bufmgr.h" #include "storage/bufpage.h" @@ -46,6 +47,8 @@ #include "utils/builtins.h" #include "utils/rel.h" +int read_stream_threshold = DEFAULT_READ_STREAM_THRESHOLD; + static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate); @@ -88,6 +91,7 @@ heapam_index_fetch_begin(Relation rel) hscan->xs_base.rel = rel; hscan->xs_base.rs = NULL; + hscan->xs_base.n_heap_reads = 0; hscan->xs_cbuf = InvalidBuffer; hscan->xs_blk = InvalidBlockNumber; hscan->vmbuf = InvalidBuffer; @@ -162,7 +166,22 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, if (scan->rs) hscan->xs_cbuf = read_stream_next_buffer(scan->rs, NULL); else - hscan->xs_cbuf = ReadBuffer(hscan->xs_base.rel, hscan->xs_blk); + { + ReadBuffersOperation operation; + operation.smgr = RelationGetSmgr(hscan->xs_base.rel); + operation.rel = hscan->xs_base.rel; + operation.persistence = hscan->xs_base.rel->rd_rel->relpersistence; + operation.forknum = MAIN_FORKNUM; + operation.strategy = NULL; + if (StartReadBuffer(&operation, + &hscan->xs_cbuf, + hscan->xs_blk, + READ_BUFFERS_SYNCHRONOUSLY)) + { + WaitReadBuffers(&operation); + scan->n_heap_reads += 1; + } + } /* * Prune page when it is pinned for the first time @@ -288,6 +307,20 @@ heap_batch_advance_pos(IndexScanDesc scan, struct BatchQueueItemPos *pos, */ batch = INDEX_SCAN_BATCH(scan, pos->batch); + /* Delay initializing stream until proceeding */ + if (!scan->xs_heapfetch->rs + && !scan->batchqueue->disabled + && !scan->xs_want_itup /* XXX prefetching disabled for IoS, for now */ + && enable_indexscan_prefetch + && scan->xs_heapfetch->n_heap_reads >= (uint64)read_stream_threshold) /* -1 -> +inf */ + { + scan->xs_heapfetch->rs = + read_stream_begin_relation(READ_STREAM_DEFAULT, NULL, + scan->heapRelation, MAIN_FORKNUM, + scan->heapRelation->rd_tableam->index_getnext_stream, + scan, 0); + } + if (ScanDirectionIsForward(direction)) { if (++pos->item <= batch->lastItem) diff --git a/src/backend/access/index/indexbatch.c b/src/backend/access/index/indexbatch.c index 29207276dca..d2fa5519378 100644 --- a/src/backend/access/index/indexbatch.c +++ b/src/backend/access/index/indexbatch.c @@ -169,17 +169,6 @@ batch_getnext(IndexScanDesc scan, ScanDirection direction) DEBUG_LOG("batch_getnext headBatch %d nextBatch %d batch %p", batchqueue->headBatch, batchqueue->nextBatch, batch); - - /* Delay initializing stream until reading from scan's second batch */ - if (priorbatch && !scan->xs_heapfetch->rs && !batchqueue->disabled && - !scan->xs_want_itup && /* XXX prefetching disabled for IoS, for - * now */ - enable_indexscan_prefetch) - scan->xs_heapfetch->rs = - read_stream_begin_relation(READ_STREAM_DEFAULT, NULL, - scan->heapRelation, MAIN_FORKNUM, - scan->heapRelation->rd_tableam->index_getnext_stream, - scan, 0); } else scan->finished = true; diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 8f6fa6843cb..0c0819e4d13 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -2322,6 +2322,16 @@ max => 'DBL_MAX', }, +{ name => 'read_stream_threshold', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_COST', + short_desc => 'Minimal number of heap reads during index scan for creation of read stream', + long_desc => 'Index scan needs to read heap to check visibility of tuples and get attributes not present in index key. Read stream allows to do it asynchronously which adds extra overhead, but allows to significantly increase speed for long scans. Specify -1 to disable.', + flags => 'GUC_EXPLAIN', + variable => 'read_stream_threshold', + boot_val => 'DEFAULT_READ_STREAM_THRESHOLD', + min => '-1', + max => 'INT_MAX', +}, + { name => 'recovery_end_command', type => 'string', context => 'PGC_SIGHUP', group => 'WAL_ARCHIVE_RECOVERY', short_desc => 'Sets the shell command that will be executed once at the end of recovery.', variable => 'recoveryEndCommand', diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 1157ba9ba9d..9690d89ad8a 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -126,6 +126,7 @@ typedef struct IndexFetchTableData { Relation rel; ReadStream *rs; + uint64 n_heap_reads; /* number of heap page read from the disk */ } IndexFetchTableData; /* diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 00f4c3d0011..97150433c99 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -28,6 +28,7 @@ #define DEFAULT_CPU_OPERATOR_COST 0.0025 #define DEFAULT_PARALLEL_TUPLE_COST 0.1 #define DEFAULT_PARALLEL_SETUP_COST 1000.0 +#define DEFAULT_READ_STREAM_THRESHOLD 10 /* defaults for non-Cost parameters */ #define DEFAULT_RECURSIVE_WORKTABLE_FACTOR 10.0 @@ -72,6 +73,7 @@ extern PGDLLIMPORT bool enable_partition_pruning; extern PGDLLIMPORT bool enable_presorted_aggregate; extern PGDLLIMPORT bool enable_async_append; extern PGDLLIMPORT int constraint_exclusion; +extern PGDLLIMPORT int read_stream_threshold; extern double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root);