diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b4fcbaf..66ed10f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1049,6 +1049,37 @@ include 'filename'
+
+ huge_tlb_pages (enum)
+
+ huge_tlb_pages> configuration parameter
+
+
+
+ Enables/disables the use of huge tlb pages. Valid values are
+ on, off and try.
+ The default value is try.
+
+
+
+ With huge_tlb_pages set to on
+ mmap() will be called with MAP_HUGETLB.
+ If the call fails the server will fail fatally.
+
+
+
+ With huge_tlb_pages set to off we
+ will not use MAP_HUGETLB at all.
+
+
+
+ With huge_tlb_pages set to try
+ we will try to use MAP_HUGETLB and fall back to
+ mmap() without MAP_HUGETLB.
+
+
+
+
temp_buffers (integer)
diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index df06312..f9de239 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -27,10 +27,14 @@
#ifdef HAVE_SYS_SHM_H
#include
#endif
+#ifdef MAP_HUGETLB
+#include
+#endif
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
+#include "utils/guc.h"
typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
@@ -61,6 +65,19 @@ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
#define MAP_FAILED ((void *) -1)
#endif
+#ifdef MAP_HUGETLB
+# ifdef __ia64__
+# define PG_HUGETLB_BASE_ADDR (void *)(0x8000000000000000UL)
+# define PG_MAP_HUGETLB (MAP_HUGETLB|MAP_FIXED)
+# else
+# define PG_HUGETLB_BASE_ADDR (void *)(0x0UL)
+# define PG_MAP_HUGETLB MAP_HUGETLB
+# endif
+#else
+# define PG_MAP_HUGETLB 0
+#endif
+
+
unsigned long UsedShmemSegID = 0;
void *UsedShmemSegAddr = NULL;
@@ -73,7 +90,6 @@ static void IpcMemoryDelete(int status, Datum shmId);
static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
IpcMemoryId *shmid);
-
/*
* InternalIpcMemoryCreate(memKey, size)
*
@@ -342,6 +358,155 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
}
+#ifdef MAP_HUGETLB
+#define HUGE_PAGE_INFO_DIR "/sys/kernel/mm/hugepages"
+
+/*
+ * static long InternalGetFreeHugepagesCount(const char *name)
+ *
+ * Attempt to read the number of available hugepages from
+ * /sys/kernel/mm/hugepages/hugepages-/free_hugepages
+ * Will fail (return -1) if file could not be opened, 0 if no pages are available
+ * and > 0 if there are free pages
+ *
+ */
+static long
+InternalGetFreeHugepagesCount(const char *name)
+{
+ int fd;
+ char buff[1024];
+ size_t len;
+ long result;
+ char *ptr;
+
+ len = snprintf(buff, 1024, "%s/%s/free_hugepages", HUGE_PAGE_INFO_DIR, name);
+ if (len == 1024) /* I don't think that this will happen ever */
+ {
+ ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
+ (errmsg("Filename %s/%s/free_hugepages is too long", HUGE_PAGE_INFO_DIR, name),
+ errcontext("while checking hugepage size")));
+ return -1;
+ }
+
+ fd = open(buff, O_RDONLY);
+ if (fd <= 0)
+ {
+ ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
+ (errmsg("Could not open file %s: %s", buff, strerror(errno)),
+ errcontext("while checking hugepage size")));
+ return -1;
+ }
+
+ len = read(fd, buff, 1024);
+ if (len <= 0)
+ {
+ ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
+ (errmsg("Error reading from file %s: %s", buff, strerror(errno)),
+ errcontext("while checking hugepage size")));
+ close(fd);
+ return -1;
+ }
+
+ /*
+ * If the content of free_hugepages is longer than or equal to 1024 bytes
+ * the rest is irrelevant; we simply want to know if there are any
+ * hugepages left
+ */
+ if (len == 1024)
+ {
+ buff[1023] = 0;
+ }
+ else
+ {
+ buff[len] = 0;
+ }
+
+ close(fd);
+
+ result = strtol(buff, &ptr, 10);
+
+ if (ptr == NULL)
+ {
+ ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
+ (errmsg("Could not convert contents of file %s/%s/free_hugepages to number", HUGE_PAGE_INFO_DIR, name),
+ errcontext("while checking hugepage size")));
+ return -1;
+ }
+
+ return result;
+}
+
+/*
+ * static long InternalGetHugepageSize()
+ *
+ * Attempt to get a valid hugepage size from /sys/kernel/mm/hugepages/ by
+ * reading directory contents
+ * Will fail (return -1) if the directory could not be opened or no valid
+ * page sizes are available. Will return the biggest hugepage size on
+ * success.
+ *
+ */
+static long
+InternalGetHugepageSize()
+{
+ struct dirent *ent;
+ DIR *dir = opendir(HUGE_PAGE_INFO_DIR);
+ long smallest_size = -1, size;
+ char *ptr;
+
+ if (dir == NULL)
+ {
+ ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
+ (errmsg("Could not open directory %s: %s", HUGE_PAGE_INFO_DIR, strerror(errno)),
+ errcontext("while checking hugepage size")));
+ return -1;
+ }
+
+ /*
+ * Linux supports multiple hugepage sizes if the hardware
+ * supports it; for each possible size there will be a
+ * directory in /sys/kernel/mm/hugepages consisting of the
+ * string hugepages- and the size of the page, e.g. on x86_64:
+ * hugepages-2048kB
+ */
+ while((ent = readdir(dir)) != NULL)
+ {
+ if (strncmp(ent->d_name, "hugepages-", 10) == 0)
+ {
+ size = strtol(ent->d_name + 10, &ptr, 10);
+ if (ptr == NULL)
+ {
+ continue;
+ }
+
+ if (strcmp(ptr, "kB") == 0)
+ {
+ size *= 1024;
+ }
+
+ if ((smallest_size == -1 || size < smallest_size)
+ && InternalGetFreeHugepagesCount(ent->d_name) > 0)
+ {
+ smallest_size = size;
+ }
+ }
+ }
+
+ closedir(dir);
+
+ if (smallest_size == -1)
+ {
+ ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
+ (errmsg("Could not find a valid hugepage size"),
+ errhint("This error usually means that either CONFIG_HUGETLB_PAGE "
+ "is not in kernel or that your architecture does not "
+ "support hugepages or you did not configure hugepages")));
+ }
+
+ return smallest_size;
+}
+#endif
+
/*
* PGSharedMemoryCreate
*
@@ -391,7 +556,17 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
*/
#ifndef EXEC_BACKEND
{
+#ifdef MAP_HUGETLB
+ long pagesize = 0;
+
+ if (huge_tlb_pages == HUGE_TLB_ON || huge_tlb_pages == HUGE_TLB_TRY)
+ pagesize = InternalGetHugepageSize();
+
+ if (pagesize <= 0)
+ pagesize = sysconf(_SC_PAGE_SIZE);
+#else
long pagesize = sysconf(_SC_PAGE_SIZE);
+#endif
/*
* Ensure request size is a multiple of pagesize.
@@ -410,8 +585,22 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
* to be false, we might need to add a run-time test here and do this
* only if the running kernel supports it.
*/
- AnonymousShmem = mmap(NULL, size, PROT_READ|PROT_WRITE, PG_MMAP_FLAGS,
- -1, 0);
+
+ if (huge_tlb_pages == HUGE_TLB_ON || huge_tlb_pages == HUGE_TLB_TRY)
+ {
+ AnonymousShmem = mmap(PG_HUGETLB_BASE_ADDR, size, PROT_READ|PROT_WRITE,
+ PG_MMAP_FLAGS|PG_MAP_HUGETLB, -1, 0);
+
+ elog(DEBUG3, "mmap() tried with MAP_HUGEPAGE: %p", AnonymousShmem);
+ }
+
+ if ((AnonymousShmem == MAP_FAILED && huge_tlb_pages == HUGE_TLB_TRY)
+ || huge_tlb_pages == HUGE_TLB_OFF)
+ {
+ AnonymousShmem = mmap(NULL, size, PROT_READ|PROT_WRITE, PG_MMAP_FLAGS,
+ -1, 0);
+ }
+
if (AnonymousShmem == MAP_FAILED)
ereport(FATAL,
(errmsg("could not map anonymous shared memory: %m"),
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 745e7be..28b6191 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -22,6 +22,7 @@
#include
#include
#include
+#include
#ifdef HAVE_SYSLOG
#include
#endif
@@ -389,6 +390,22 @@ static const struct config_enum_entry synchronous_commit_options[] = {
};
/*
+ * huge_tlb_pages may be on|off|try, where try is the default
+ * on: try to mmap() with MAP_HUGETLB and fail when mmap() fails
+ * off: do not try tp mmap() with MAP_HUGETLB
+ * try: try to mmap() with MAP_HUGETLB and fallback to mmap()
+ * w/o MAP_HUGETLB
+ */
+static const struct config_enum_entry huge_tlb_options[] = {
+#ifdef MAP_HUGETLB
+ {"on", HUGE_TLB_ON, false},
+ {"try", HUGE_TLB_TRY, false},
+#endif
+ {"off", HUGE_TLB_OFF, false},
+ {NULL, 0, false}
+};
+
+/*
* Options for enum values stored in other modules
*/
extern const struct config_enum_entry wal_level_options[];
@@ -447,6 +464,12 @@ int tcp_keepalives_idle;
int tcp_keepalives_interval;
int tcp_keepalives_count;
+#ifdef MAP_HUGETLB
+int huge_tlb_pages = HUGE_TLB_TRY;
+#else
+int huge_tlb_pages = HUGE_TLB_OFF;
+#endif
+
/*
* These variables are all dummies that don't do anything, except in some
* cases provide the value for SHOW to display. The real state is elsewhere
@@ -3301,6 +3324,26 @@ static struct config_enum ConfigureNamesEnum[] =
NULL, NULL, NULL
},
+ {
+ {"huge_tlb_pages",
+#ifdef MAP_HUGETLB
+ PGC_SUSET,
+#else
+ PGC_INTERNAL,
+#endif
+ RESOURCES_MEM,
+ gettext_noop("Enable/disable the use of the hugepages feature"),
+ NULL
+ },
+ &huge_tlb_pages,
+#ifdef MAP_HUGETLB
+ HUGE_TLB_TRY,
+#else
+ HUGE_TLB_OFF,
+#endif
+ huge_tlb_options,
+ NULL, NULL, NULL
+ },
/* End-of-list marker */
{
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index eeb9b82..e5bafec 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -113,6 +113,7 @@
#shared_buffers = 32MB # min 128kB
# (change requires restart)
+#huge_tlb_pages = try # try to map memory with MAP_HUGETLB (on, off, try)
#temp_buffers = 8MB # min 800kB
#max_prepared_transactions = 0 # zero disables the feature
# (change requires restart)
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 06f797c..17f5870 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -230,6 +230,24 @@ extern int tcp_keepalives_idle;
extern int tcp_keepalives_interval;
extern int tcp_keepalives_count;
+
+/*
+ * Possible values for huge_tlb_pages; default is HUGE_TLB_TRY
+ */
+typedef enum
+{
+ HUGE_TLB_OFF,
+ HUGE_TLB_ON,
+ HUGE_TLB_TRY
+} HugeTlbType;
+
+
+/*
+ * configure the use of huge TLB pages
+ */
+extern int huge_tlb_pages;
+
+
/*
* Functions exported by guc.c
*/