diff --git a/doc/src/sgml/ref/create_tablespace.sgml b/doc/src/sgml/ref/create_tablespace.sgml
index 5756c3e..cf08408 100644
--- a/doc/src/sgml/ref/create_tablespace.sgml
+++ b/doc/src/sgml/ref/create_tablespace.sgml
@@ -104,14 +104,15 @@ CREATE TABLESPACE tablespace_name
A tablespace parameter to be set or reset. Currently, the only
- available parameters are seq_page_cost> and
- random_page_cost>. Setting either value for a particular
- tablespace will override the planner's usual estimate of the cost of
- reading pages from tables in that tablespace, as established by
- the configuration parameters of the same name (see
- ,
- ). This may be useful if one
- tablespace is located on a disk which is faster or slower than the
+ available parameters are seq_page_cost>,
+ random_page_cost> and effective_io_concurrency>.
+ Setting either value for a particular tablespace will override the
+ planner's usual estimate of the cost of reading pages from tables in
+ that tablespace, as established by the configuration parameters of the
+ same name (see ,
+ ,
+ ). This may be useful if
+ one tablespace is located on a disk which is faster or slower than the
remainder of the I/O subsystem.
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 8176b6a..fb24d74 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -232,6 +232,18 @@ static relopt_int intRelOpts[] =
},
-1, 64, MAX_KILOBYTES
},
+ {
+ {
+ "effective_io_concurrency",
+ "Number of simultaneous requests that can be handled efficiently by the disk subsystem.",
+ RELOPT_KIND_TABLESPACE
+ },
+#ifdef USE_PREFETCH
+ 1, 0, MAX_IO_CONCURRENCY
+#else
+ 0, 0, 0
+#endif
+ },
/* list terminator */
{{NULL}}
@@ -1387,7 +1399,8 @@ tablespace_reloptions(Datum reloptions, bool validate)
int numoptions;
static const relopt_parse_elt tab[] = {
{"random_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, random_page_cost)},
- {"seq_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, seq_page_cost)}
+ {"seq_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, seq_page_cost)},
+ {"effective_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, effective_io_concurrency)}
};
options = parseRelOptions(reloptions, validate, RELOPT_KIND_TABLESPACE,
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 4597437..7ea77c8 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -42,8 +42,10 @@
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/predicate.h"
+#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/rel.h"
+#include "utils/spccache.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
@@ -111,7 +113,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
node->tbmres = tbmres = NULL;
#ifdef USE_PREFETCH
- if (target_prefetch_pages > 0)
+ if (node->target_prefetch_pages > 0)
{
node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
node->prefetch_pages = 0;
@@ -188,10 +190,10 @@ BitmapHeapNext(BitmapHeapScanState *node)
* page/tuple, then to one after the second tuple is fetched, then
* it doubles as later pages are fetched.
*/
- if (node->prefetch_target >= target_prefetch_pages)
+ if (node->prefetch_target >= node->target_prefetch_pages)
/* don't increase any further */ ;
- else if (node->prefetch_target >= target_prefetch_pages / 2)
- node->prefetch_target = target_prefetch_pages;
+ else if (node->prefetch_target >= node->target_prefetch_pages / 2)
+ node->prefetch_target = node->target_prefetch_pages;
else if (node->prefetch_target > 0)
node->prefetch_target *= 2;
else
@@ -211,7 +213,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
* Try to prefetch at least a few pages even before we get to the
* second page if we don't stop reading after the first tuple.
*/
- if (node->prefetch_target < target_prefetch_pages)
+ if (node->prefetch_target < node->target_prefetch_pages)
node->prefetch_target++;
#endif /* USE_PREFETCH */
}
@@ -539,6 +541,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
{
BitmapHeapScanState *scanstate;
Relation currentRelation;
+#ifdef USE_PREFETCH
+ int new_io_concurrency;
+#endif
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -598,6 +603,25 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
*/
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
+#ifdef USE_PREFETCH
+ /* check if the effective_io_concurrency has been overloaded for the
+ * tablespace storing the relation and compute the target_prefetch_pages,
+ * or just get the current target_prefetch_pages
+ */
+ new_io_concurrency = get_tablespace_io_concurrency(
+ currentRelation->rd_rel->reltablespace);
+
+
+ scanstate->target_prefetch_pages = target_prefetch_pages;
+
+ if (new_io_concurrency != effective_io_concurrency)
+ {
+ double prefetch_pages;
+ if (compute_io_concurrency(new_io_concurrency, &prefetch_pages))
+ scanstate->target_prefetch_pages = rint(prefetch_pages);
+ }
+#endif
+
scanstate->ss.ss_currentRelation = currentRelation;
/*
@@ -634,3 +658,58 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
*/
return scanstate;
}
+
+bool
+compute_io_concurrency(int io_concurrency, double *target_prefetch_pages)
+{
+ double new_prefetch_pages = 0.0;
+ int i;
+
+ /* make sure the io_concurrency value is correct, it may have been forced
+ * with a pg_tablespace UPDATE
+ */
+ if (io_concurrency > MAX_IO_CONCURRENCY)
+ io_concurrency = MAX_IO_CONCURRENCY;
+
+ /*----------
+ * The user-visible GUC parameter is the number of drives (spindles),
+ * which we need to translate to a number-of-pages-to-prefetch target.
+ * The target value is stashed in *extra and then assigned to the actual
+ * variable by assign_effective_io_concurrency.
+ *
+ * The expected number of prefetch pages needed to keep N drives busy is:
+ *
+ * drives | I/O requests
+ * -------+----------------
+ * 1 | 1
+ * 2 | 2/1 + 2/2 = 3
+ * 3 | 3/1 + 3/2 + 3/3 = 5 1/2
+ * 4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
+ * n | n * H(n)
+ *
+ * This is called the "coupon collector problem" and H(n) is called the
+ * harmonic series. This could be approximated by n * ln(n), but for
+ * reasonable numbers of drives we might as well just compute the series.
+ *
+ * Alternatively we could set the target to the number of pages necessary
+ * so that the expected number of active spindles is some arbitrary
+ * percentage of the total. This sounds the same but is actually slightly
+ * different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
+ * that desired fraction.
+ *
+ * Experimental results show that both of these formulas aren't aggressive
+ * enough, but we don't really have any better proposals.
+ *
+ * Note that if io_concurrency = 0 (disabled), we must set target = 0.
+ *----------
+ */
+
+
+ for (i = 1; i <= io_concurrency; i++)
+ new_prefetch_pages += (double) io_concurrency / (double) i;
+
+ *target_prefetch_pages = new_prefetch_pages;
+
+ /* This range check shouldn't fail, but let's be paranoid */
+ return (new_prefetch_pages > 0.0 && new_prefetch_pages < (double) INT_MAX);
+}
diff --git a/src/backend/utils/cache/spccache.c b/src/backend/utils/cache/spccache.c
index 1a0c884..970d66b 100644
--- a/src/backend/utils/cache/spccache.c
+++ b/src/backend/utils/cache/spccache.c
@@ -23,7 +23,9 @@
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
+#include "storage/bufmgr.h"
#include "utils/catcache.h"
+#include "utils/guc.h"
#include "utils/hsearch.h"
#include "utils/inval.h"
#include "utils/spccache.h"
@@ -198,3 +200,16 @@ get_tablespace_page_costs(Oid spcid,
*spc_seq_page_cost = spc->opts->seq_page_cost;
}
}
+
+int
+get_tablespace_io_concurrency(Oid spcid)
+{
+ TableSpaceCacheEntry *spc = get_tablespace(spcid);
+
+ Assert(spc != NULL);
+
+ if (!spc->opts || spc->opts->effective_io_concurrency < 0)
+ return effective_io_concurrency;
+ else
+ return spc->opts->effective_io_concurrency;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1bed525..6d7c0ae 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -37,6 +37,7 @@
#include "commands/vacuum.h"
#include "commands/variable.h"
#include "commands/trigger.h"
+#include "executor/nodeBitmapHeapscan.h"
#include "funcapi.h"
#include "libpq/auth.h"
#include "libpq/be-fsstubs.h"
@@ -438,6 +439,8 @@ int temp_file_limit = -1;
int num_temp_buffers = 1024;
+int effective_io_concurrency = 0;
+
char *cluster_name = "";
char *ConfigFileName;
char *HbaFileName;
@@ -490,7 +493,6 @@ static int wal_block_size;
static bool data_checksums;
static int wal_segment_size;
static bool integer_datetimes;
-static int effective_io_concurrency;
static bool assert_enabled;
/* should be static, but commands/variable.c needs to get at this */
@@ -2352,7 +2354,7 @@ static struct config_int ConfigureNamesInt[] =
},
&effective_io_concurrency,
#ifdef USE_PREFETCH
- 1, 0, 1000,
+ 1, 0, MAX_IO_CONCURRENCY,
#else
0, 0, 0,
#endif
@@ -9997,47 +9999,9 @@ static bool
check_effective_io_concurrency(int *newval, void **extra, GucSource source)
{
#ifdef USE_PREFETCH
- double new_prefetch_pages = 0.0;
- int i;
-
- /*----------
- * The user-visible GUC parameter is the number of drives (spindles),
- * which we need to translate to a number-of-pages-to-prefetch target.
- * The target value is stashed in *extra and then assigned to the actual
- * variable by assign_effective_io_concurrency.
- *
- * The expected number of prefetch pages needed to keep N drives busy is:
- *
- * drives | I/O requests
- * -------+----------------
- * 1 | 1
- * 2 | 2/1 + 2/2 = 3
- * 3 | 3/1 + 3/2 + 3/3 = 5 1/2
- * 4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
- * n | n * H(n)
- *
- * This is called the "coupon collector problem" and H(n) is called the
- * harmonic series. This could be approximated by n * ln(n), but for
- * reasonable numbers of drives we might as well just compute the series.
- *
- * Alternatively we could set the target to the number of pages necessary
- * so that the expected number of active spindles is some arbitrary
- * percentage of the total. This sounds the same but is actually slightly
- * different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
- * that desired fraction.
- *
- * Experimental results show that both of these formulas aren't aggressive
- * enough, but we don't really have any better proposals.
- *
- * Note that if *newval = 0 (disabled), we must set target = 0.
- *----------
- */
-
- for (i = 1; i <= *newval; i++)
- new_prefetch_pages += (double) *newval / (double) i;
+ double new_prefetch_pages;
- /* This range check shouldn't fail, but let's be paranoid */
- if (new_prefetch_pages >= 0.0 && new_prefetch_pages < (double) INT_MAX)
+ if (compute_io_concurrency(*newval, &new_prefetch_pages))
{
int *myextra = (int *) guc_malloc(ERROR, sizeof(int));
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 0683548..36b8a75 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1870,7 +1870,7 @@ psql_completion(const char *text, int start, int end)
pg_strcasecmp(prev_wd, "(") == 0)
{
static const char *const list_TABLESPACEOPTIONS[] =
- {"seq_page_cost", "random_page_cost", NULL};
+ {"seq_page_cost", "random_page_cost", "effective_io_concurrency", NULL};
COMPLETE_WITH_LIST(list_TABLESPACEOPTIONS);
}
diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h
index 6b928a5..be9582a 100644
--- a/src/include/commands/tablespace.h
+++ b/src/include/commands/tablespace.h
@@ -39,6 +39,7 @@ typedef struct TableSpaceOpts
int32 vl_len_; /* varlena header (do not touch directly!) */
float8 random_page_cost;
float8 seq_page_cost;
+ int effective_io_concurrency;
} TableSpaceOpts;
extern Oid CreateTableSpace(CreateTableSpaceStmt *stmt);
diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h
index 3183376..698fcf5 100644
--- a/src/include/executor/nodeBitmapHeapscan.h
+++ b/src/include/executor/nodeBitmapHeapscan.h
@@ -20,5 +20,6 @@ extern BitmapHeapScanState *ExecInitBitmapHeapScan(BitmapHeapScan *node, EState
extern TupleTableSlot *ExecBitmapHeapScan(BitmapHeapScanState *node);
extern void ExecEndBitmapHeapScan(BitmapHeapScanState *node);
extern void ExecReScanBitmapHeapScan(BitmapHeapScanState *node);
+extern bool compute_io_concurrency(int io_concurrency, double *target_prefetch_pages);
#endif /* NODEBITMAPHEAPSCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 541ee18..c6d48fa 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1410,15 +1410,16 @@ typedef struct BitmapIndexScanState
/* ----------------
* BitmapHeapScanState information
*
- * bitmapqualorig execution state for bitmapqualorig expressions
- * tbm bitmap obtained from child index scan(s)
- * tbmiterator iterator for scanning current pages
- * tbmres current-page data
- * exact_pages total number of exact pages retrieved
- * lossy_pages total number of lossy pages retrieved
- * prefetch_iterator iterator for prefetching ahead of current page
- * prefetch_pages # pages prefetch iterator is ahead of current
- * prefetch_target target prefetch distance
+ * bitmapqualorig execution state for bitmapqualorig expressions
+ * tbm bitmap obtained from child index scan(s)
+ * tbmiterator iterator for scanning current pages
+ * tbmres current-page data
+ * exact_pages total number of exact pages retrieved
+ * lossy_pages total number of lossy pages retrieved
+ * prefetch_iterator iterator for prefetching ahead of current page
+ * prefetch_pages # pages prefetch iterator is ahead of current
+ * prefetch_target target prefetch distance
+ * target_prefetch_pages may be overloaded by tablespace setting
* ----------------
*/
typedef struct BitmapHeapScanState
@@ -1433,6 +1434,9 @@ typedef struct BitmapHeapScanState
TBMIterator *prefetch_iterator;
int prefetch_pages;
int prefetch_target;
+#ifdef USE_PREFETCH
+ int target_prefetch_pages;
+#endif
} BitmapHeapScanState;
/* ----------------
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index dc167f9..57008fc 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -26,6 +26,9 @@
#define MAX_KILOBYTES (INT_MAX / 1024)
#endif
+/* upper limit for effective_io_concurrency */
+#define MAX_IO_CONCURRENCY 1000
+
/*
* Automatic configuration file name for ALTER SYSTEM.
* This file will be used to store values of configuration parameters
@@ -256,6 +259,8 @@ extern int temp_file_limit;
extern int num_temp_buffers;
+extern int effective_io_concurrency;
+
extern char *cluster_name;
extern char *ConfigFileName;
extern char *HbaFileName;
diff --git a/src/include/utils/spccache.h b/src/include/utils/spccache.h
index bdd1c0f..e5b9769 100644
--- a/src/include/utils/spccache.h
+++ b/src/include/utils/spccache.h
@@ -15,5 +15,6 @@
void get_tablespace_page_costs(Oid spcid, float8 *spc_random_page_cost,
float8 *spc_seq_page_cost);
+int get_tablespace_io_concurrency(Oid spcid);
#endif /* SPCCACHE_H */