diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 189219ad88..239181d3bb 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8463,6 +8463,62 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
     </sect2>
    </sect1>
 
+   <sect1 id="runtime-config-encryption">
+    <title>Encryption</title>
+
+     <variablelist>
+
+     <varlistentry id="guc-encryption-key-cmd" xreflabel="encryption_key_command">
+      <term><varname>encryption_key_command</varname> (<type>string</type>)
+      <indexterm>
+       <primary>encryption</primary>
+      </indexterm>
+      <indexterm>
+       <primary>key command</primary>
+      </indexterm>
+      <indexterm>
+       <primary><varname>encryption_key_command</varname> configuration
+       parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        This setting specifies path to executable file that returns
+        either <literal>encryption_key=</literal> for key
+        and <literal>encryption_password=</literal> for password.
+       </para>
+
+       <para>
+        The encryption key is expected in hexadecimal format, two characters
+        (hexadecimal digits) per
+        byte. Since <productname>PostgreSQL</productname> currently uses key
+        of length 32 bytes (256 bits), the expected length of the key string
+        is 64 characters. For example:
+<computeroutput>
+encryption_key=882fb7c12e80280fd664c69d2d636913e86c381ba487c82f77653c0fac8ffc69
+</computeroutput>
+       </para>
+
+       <para>
+        An example of encryption password:
+<computeroutput>
+encryption_password=mysecurepwd
+</computeroutput>
+        Minimum password length is 8 characters, the maximum length is 16
+        characters.
+       </para>
+
+       <para>
+        <varname>encryption_key_command</varname> can only be set in
+        the <filename>postgresql.conf</filename> file or on the server command
+        line.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     </variablelist>
+   </sect1>
+
    <sect1 id="runtime-config-locks">
     <title>Lock Management</title>
 
@@ -8998,6 +9054,20 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-data-encryption" xreflabel="data_encryption">
+      <term><varname>data_encryption</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>data_encryption</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Reports whether data encryption is enabled for this cluster.
+        See <xref linkend="encryption"/> for more information.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-data-directory-mode" xreflabel="data_directory_mode">
       <term><varname>data_directory_mode</varname> (<type>integer</type>)
       <indexterm>
@@ -9812,6 +9882,10 @@ LOG:  CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1)
         <entry><literal>listen_addresses = '*'</literal></entry>
        </row>
        <row>
+        <entry><option>-K <replaceable>x</replaceable></option></entry>
+        <entry><literal>encryption_key_command = <replaceable>x</replaceable></literal></entry>
+       </row>
+       <row>
         <entry><option>-k <replaceable>x</replaceable></option></entry>
         <entry><literal>unix_socket_directories = <replaceable>x</replaceable></literal></entry>
        </row>
diff --git a/doc/src/sgml/encryption.sgml b/doc/src/sgml/encryption.sgml
new file mode 100644
index 0000000000..20de042d1a
--- /dev/null
+++ b/doc/src/sgml/encryption.sgml
@@ -0,0 +1,74 @@
+<!-- doc/src/sgml/encryption.sgml -->
+
+<chapter id="encryption">
+ <title>Transparent Cluster Encryption</title>
+
+ <para>
+  Cluster encryption can be used if <acronym>DBA</acronym> cannot or does not
+  want to rely on the filesystem in terms of data confidentiality. If this
+  feature is enabled, <productname>PostgreSQL</productname> encrypts data
+  (both relations and write-ahead-log) when writing it to disk, and decrypts
+  it when reading. The encryption is transparent, so applications see no
+  difference between encrypted and unencrypted cluster.
+ </para>
+
+ <para>
+  To create encrypted cluster, use the <option>-K</option> option to pass the
+  <xref linkend="app-initdb"/> utility path to the command to retrieve the
+  encryption key or password. For example:
+<screen>
+<prompt>$</prompt> <userinput>initdb -D /usr/local/pgsql/data -K /usr/local/pgsql/fetch_key_cmd</userinput>
+</screen>
+  Here <filename>/usr/local/pgsql/fetch_key_cmd</filename> is an executable
+  file that returns either encryption key or encryption password with the
+  appropriate prefix, see <xref linkend="guc-encryption-key-cmd"/> for more
+  information.
+ </para>
+
+ <note>
+  <para>
+   Internally, <productname>PostgreSQL</productname> always uses the
+   encryption key. If the encryption key command returns password, the
+   encryption key is derived from it during startup. If user needs the key, he
+   can always use the <xref linkend="app-pgkeysetup"/> utility to derive it
+   himself.
+  </para>
+ </note>
+
+ <para>
+  On completion, <command>initdb</command> stores the encryption key command
+  to <filename>postgresql.conf</filename>. Thus user can control the cluster
+  using <xref linkend="app-pg-ctl"/> without passing it the encryption command
+  again and again.
+ </para>
+
+ <para>
+  If encryption is enabled, <xref linkend="guc-full-page-writes"/> must be
+  turned on, otherwise the server refuses to start. This is because the
+  encryption introduces dependencies between data within a page, and thus
+  server crash during disk write can result in more serious damage of the page
+  than it would do without encryption. The whole page needs to be retrieved
+  from WAL in such a case to ensure reliable recovery.
+ </para>
+
+ <para>
+  Once the <productname>PostgreSQL</productname> server is running, client
+  applications should recognize no difference from an unencrypted cluster,
+  except that <xref linkend="guc-data-encryption"/> configuration variable is
+  set.
+ </para>
+
+ <para>
+  Unlike <xref linkend="app-pg-ctl"/>, some of the server applications (for
+  example <xref linkend="pgwaldump"/>) do need the <option>-K</option> because
+  they are not able to process <filename>postgresql.conf</filename> file.
+ </para>
+
+ <para>
+  Since WAL is encrypted, any replication solution based on log shipping
+  (<xref linkend="warm-standby"/>) assume that all standby servers are
+  encrypted using the same key as their standby server. On the other hand,
+  <xref linkend="logical-replication"/> allows replication between encrypted
+  and unencrypted clusters, or between clusters encrypted with different keys.
+ </para>
+</chapter>
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 7e37042a55..0e22070504 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -48,6 +48,7 @@
 <!ENTITY wal           SYSTEM "wal.sgml">
 <!ENTITY logical-replication    SYSTEM "logical-replication.sgml">
 <!ENTITY jit    SYSTEM "jit.sgml">
+<!ENTITY encryption    SYSTEM "encryption.sgml">
 
 <!-- programmer's guide -->
 <!ENTITY bgworker   SYSTEM "bgworker.sgml">
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 449386243b..a2daf33e6f 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -245,8 +245,9 @@ su - postgres
     <listitem>
      <para>
       You need <productname>OpenSSL</productname>, if you want to support
-      encrypted client connections.  <productname>OpenSSL</productname> is
-      also required for random number generation on platforms that do not
+      on-disk data encryption or encrypted client
+      connections.  <productname>OpenSSL</productname> is also required for
+      random number generation on platforms that do not
       have <filename>/dev/urandom</filename> (except Windows).  The minimum
       version required is 0.9.8.
      </para>
@@ -835,10 +836,11 @@ su - postgres
        </term>
        <listitem>
         <para>
-         Build with support for <acronym>SSL</acronym> (encrypted)
-         connections. This requires the <productname>OpenSSL</productname>
-         package to be installed.  <filename>configure</filename> will check
-         for the required header files and libraries to make sure that
+         Build with support for on-disk data encryption
+         or <acronym>SSL</acronym> (encrypted) connections. This requires
+         the <productname>OpenSSL</productname> package to be
+         installed.  <filename>configure</filename> will check for the
+         required header files and libraries to make sure that
          your <productname>OpenSSL</productname> installation is sufficient
          before proceeding.
         </para>
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 3e115f1c76..b0d35600b2 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -163,6 +163,7 @@
   &wal;
   &logical-replication;
   &jit;
+  &encryption;
   &regress;
 
  </part>
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index 8d91f3529e..4799fe61c7 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -206,6 +206,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY pgDump             SYSTEM "pg_dump.sgml">
 <!ENTITY pgDumpall          SYSTEM "pg_dumpall.sgml">
 <!ENTITY pgIsready          SYSTEM "pg_isready.sgml">
+<!ENTITY pgKeysetup         SYSTEM "pg_keysetup.sgml">
 <!ENTITY pgReceivewal       SYSTEM "pg_receivewal.sgml">
 <!ENTITY pgRecvlogical      SYSTEM "pg_recvlogical.sgml">
 <!ENTITY pgResetwal         SYSTEM "pg_resetwal.sgml">
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 7fc3152c6d..8b9b7ab25b 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -209,6 +209,18 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-data-encr-cmd" xreflabel="data encryption">
+      <term><option>-K</option></term>
+      <term><option>--encryption-key-command=<replaceable class="parameter">command</replaceable></option></term>
+      <listitem>
+       <para>
+        Encrypt the cluster data using a key or password retrieved from the
+        command specified here. This option can only be set during
+        initialization, and cannot be changed later.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-data-checksums" xreflabel="data checksums">
       <term><option>-k</option></term>
       <term><option>--data-checksums</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a0ffeb0ab0..bc42bb6168 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -101,6 +101,17 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>-K</option></term>
+      <term><option>--encryption-key-command=<replaceable class="parameter">command</replaceable></option></term>
+      <listitem>
+       <para>
+        If the cluster is encrypted, use key or password retrieved from the
+        command specified here to decrypt and encrypt the data.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>-N</option></term>
       <term><option>--no-sync</option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_keysetup.sgml b/doc/src/sgml/ref/pg_keysetup.sgml
new file mode 100644
index 0000000000..89428fe6bd
--- /dev/null
+++ b/doc/src/sgml/ref/pg_keysetup.sgml
@@ -0,0 +1,74 @@
+<!--
+doc/src/sgml/ref/pg_keysetup.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="app-pgkeysetup">
+ <indexterm zone="app-pgkeysetup">
+  <primary>pg_keysetup</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle><application>pg_keysetup</application></refentrytitle>
+  <manvolnum>1</manvolnum>
+  <refmiscinfo>Application</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>pg_keysetup</refname>
+  <refpurpose>derive encryption key from encryption password</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+  <cmdsynopsis>
+   <command>pg_keysetup</command>
+   <group choice="plain">
+    <group choice="opt">
+     <arg choice="plain"><option>-D</option></arg>
+    </group>
+    <replaceable class="parameter"> datadir</replaceable>
+   </group>
+  </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+  <para>
+   <application>pg_keysetup</application> reads encryption password from the
+   standard input, runs <firstterm>key derivation function</firstterm> on it
+   and writes encryption key to the standard output. This is useful if
+   <xref linkend="guc-encryption-key-cmd"/> returns encryption password, but user
+   needs the encryption key. The typical use case is that user needs to run
+   some server application (for example <xref linkend="pgwaldump"/>) which
+   only accepts the key.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Options</title>
+
+   <para>
+    The application currently accepts only the following command-line option:
+
+    <variablelist>
+     <varlistentry>
+      <term><option>-D <replaceable>directory</replaceable></option></term>
+      <listitem>
+       <para>
+        Specifies the directory where the database cluster is stored.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+
+   </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+  <para>
+   When run against different data directories, the function is likely to turn
+   the same password into different keys.
+  </para>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/pg_resetwal.sgml b/doc/src/sgml/ref/pg_resetwal.sgml
index 8a9e22d050..dbdc234666 100644
--- a/doc/src/sgml/ref/pg_resetwal.sgml
+++ b/doc/src/sgml/ref/pg_resetwal.sgml
@@ -100,6 +100,17 @@ PostgreSQL documentation
    </varlistentry>
 
    <varlistentry>
+    <term><option>-K</option></term>
+    <term><option>--encryption-key-command=<replaceable class="parameter">command</replaceable></option></term>
+    <listitem>
+     <para>
+       If the cluster is encrypted, use key or password retrieved from the
+       command specified here to decrypt and encrypt the data.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
     <term><option>-n</option></term>
     <term><option>--dry-run</option></term>
     <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 4d91eeb0ff..79ed2347c0 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -140,6 +140,17 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>-K</option></term>
+      <term><option>--encryption-key-command=<replaceable class="parameter">command</replaceable></option></term>
+      <listitem>
+       <para>
+        If the cluster is encrypted, use key or password retrieved from the
+        command specified here to decrypt and encrypt the data.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--source-pgdata=<replaceable class="parameter">directory</replaceable></option></term>
       <listitem>
        <para>
diff --git a/doc/src/sgml/ref/pg_waldump.sgml b/doc/src/sgml/ref/pg_waldump.sgml
index 329c10e430..d012b303fc 100644
--- a/doc/src/sgml/ref/pg_waldump.sgml
+++ b/doc/src/sgml/ref/pg_waldump.sgml
@@ -101,6 +101,17 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>-K</option></term>
+      <term><option>--encryption-key-command=<replaceable class="parameter">command</replaceable></option></term>
+      <listitem>
+       <para>
+         If the cluster is encrypted, use key or password retrieved from the
+         command specified here to decrypt and encrypt the data.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>-n <replaceable>limit</replaceable></option></term>
       <term><option>--limit=<replaceable>limit</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index cef09dd38b..7db13f716a 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -279,6 +279,7 @@
    &pgChecksums;
    &pgControldata;
    &pgCtl;
+   &pgKeysetup;
    &pgResetwal;
    &pgRewind;
    &pgtestfsync;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6fabc5e27f..7918f0b0cf 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -55,6 +55,7 @@
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
+#include "storage/encryption.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/large_object.h"
@@ -77,6 +78,7 @@
 #include "pg_trace.h"
 
 extern uint32 bootstrap_data_checksum_version;
+extern char *bootstrap_encryption_sample;
 
 /* Unsupported old recovery command file names (relative to $PGDATA) */
 #define RECOVERY_COMMAND_FILE	"recovery.conf"
@@ -2488,7 +2490,61 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 
 			/* OK to write the page(s) */
 			from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
-			startoffset += XLogWritePages(from, npages, startoffset);
+			if (data_encrypted)
+			{
+				int			i,
+							nencrypted;
+				char	   *to;
+				uint32		encr_offset;
+
+				/*
+				 * Encrypt and write multiple pages at a time, in order to
+				 * reduce the number of syscalls.
+				 */
+				nencrypted = 0;
+				to = encrypt_buf_xlog;
+				encr_offset = startoffset;
+				for (i = 1; i <= npages; i++)
+				{
+					char		tweak[TWEAK_SIZE];
+					Size		nbytes;
+
+					XLogEncryptionTweak(tweak, ThisTimeLineID, openLogSegNo, encr_offset);
+
+					/*
+					 * We should not encrypt the unused space, in order to
+					 * avoid "reused key attack".
+					 */
+					if (i == npages && ispartialpage)
+						nbytes = WriteRqst.Write % XLOG_BLCKSZ;
+					else
+						nbytes = XLOG_BLCKSZ;
+
+					encrypt_block(from, to, nbytes, tweak, true);
+					nencrypted++;
+					from += XLOG_BLCKSZ;
+					to += XLOG_BLCKSZ;
+					encr_offset += XLOG_BLCKSZ;
+
+					/*
+					 * Write the encrypted data if the encryption buffer is
+					 * full or if the last page has been encrypted.
+					 */
+					if (nencrypted >= XLOG_ENCRYPT_BUF_PAGES || i >= npages)
+					{
+						startoffset += XLogWritePages(encrypt_buf_xlog,
+													  nencrypted,
+													  startoffset);
+
+						/* Prepare for the next round of page encryptions. */
+						nencrypted = 0;
+						to = encrypt_buf_xlog;
+						encr_offset = startoffset;
+					}
+				}
+			}
+			else
+				startoffset += XLogWritePages(from, npages, startoffset);
 
 			npages = 0;
 
@@ -3478,6 +3534,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 			}
 			pgstat_report_wait_end();
 		}
+
+		/*
+		 * Since timeline is being changed and since encryption tweak contains
+		 * the timeline, we need to decrypt the buffer and encrypt it with the
+		 * new tweak. Do not encrypt the unused space, in order to avoid
+		 * "reused key attack".
+		 */
+		if (data_encrypted && nread > 0)
+		{
+			char		tweak[TWEAK_SIZE];
+
+			XLogEncryptionTweak(tweak, srcTLI, srcsegno, nbytes);
+			decrypt_block(buffer.data, buffer.data, nread, tweak, true);
+
+			XLogEncryptionTweak(tweak, ThisTimeLineID, destsegno, nbytes);
+			encrypt_block(buffer.data, buffer.data, nread, tweak, true);
+		}
+
 		errno = 0;
 		pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
 		if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
@@ -4790,6 +4864,54 @@ ReadControlFile(void)
 		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						errmsg("\"max_wal_size\" must be at least twice \"wal_segment_size\"")));
 
+	/*
+	 * Initialize encryption, but not if the current backend has already done
+	 * that.
+	 */
+	if (ControlFile->data_cipher > PG_CIPHER_NONE && !data_encrypted)
+	{
+		char		sample[ENCRYPTION_SAMPLE_SIZE];
+
+		setup_encryption(false, NULL);
+		SetConfigOption("data_encryption", "true", PGC_INTERNAL,
+						PGC_S_OVERRIDE);
+
+		memset(sample, 0, ENCRYPTION_SAMPLE_SIZE);
+		sample_encryption(sample);
+
+		if (memcmp(ControlFile->encryption_verification, sample, ENCRYPTION_SAMPLE_SIZE))
+			ereport(FATAL,
+					(errmsg("invalid encryption key"),
+					 errdetail("The passed encryption key does not match"
+							   " database encryption key.")));
+
+		/*
+		 * full_page_writes must be set because torn page write of an
+		 * encrypted page implies that decryption of the page will produce
+		 * garbage. This damage can affect even those parts of the page that
+		 * haven't been modified by any access method. And since no access
+		 * method modified those parts, there might be no XLOG records to
+		 * repair them during crash recovery. So full page image is the only
+		 * way to fix such a page.
+		 *
+		 * XXX It would be nice to have guc.c check so that we don't have to
+		 * copy and paste the error message. However it's unclear how to
+		 * ensure that either ERROR is raised or nothing happens at all. It
+		 * seems that set_config_option() can change
+		 * config_generic.reset_source if the check succeeded, but that's too
+		 * invasive.
+		 */
+		if (!fullPageWrites)
+			ereport(FATAL,
+					(errmsg("invalid value for parameter \"full_page_writes\": %d",
+							fullPageWrites),
+					 errdetail("Cannot disable parameter when the cluster is encrypted.")));
+	}
+
+	/*
+	 * This calculation relies on data_encryption (in particular the header
+	 * sizes do), so we could not do it earlier.
+	 */
 	UsableBytesInSegment =
 		(wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
 		(SizeOfXLogLongPHD - SizeOfXLogShortPHD);
@@ -5220,6 +5342,14 @@ BootStrapXLOG(void)
 	use_existent = false;
 	openLogFile = XLogFileInit(1, &use_existent, false);
 
+	if (data_encrypted)
+	{
+		char		tweak[TWEAK_SIZE];
+
+		XLogEncryptionTweak(tweak, ThisTimeLineID, 1, 0);
+		encrypt_block((char *) page, (char *) page, XLOG_BLCKSZ, tweak, true);
+	}
+
 	/* Write the first page with the initial record */
 	errno = 0;
 	pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
@@ -5270,6 +5400,22 @@ BootStrapXLOG(void)
 	ControlFile->wal_log_hints = wal_log_hints;
 	ControlFile->track_commit_timestamp = track_commit_timestamp;
 	ControlFile->data_checksum_version = bootstrap_data_checksum_version;
+	ControlFile->data_cipher = PG_CIPHER_AES_BLOCK_CBC_256_STREAM_CTR_256;
+
+	if (data_encrypted)
+	{
+		char	   *sample;
+
+		sample = palloc0(ENCRYPTION_SAMPLE_SIZE);
+		sample_encryption(sample);
+
+		memcpy(ControlFile->encryption_verification, sample,
+			   ENCRYPTION_SAMPLE_SIZE);
+		pfree(sample);
+	}
+	else
+		memset(ControlFile->encryption_verification, 0,
+			   ENCRYPTION_SAMPLE_SIZE);
 
 	/* some additional ControlFile fields are set in WriteControlFile() */
 
@@ -11641,6 +11787,14 @@ retry:
 	Assert(targetPageOff == readOff);
 	Assert(reqLen <= readLen);
 
+	if (data_encrypted)
+	{
+		char		tweak[TWEAK_SIZE];
+
+		XLogEncryptionTweak(tweak, curFileTLI, readSegNo, readOff);
+		decrypt_block(readBuf, readBuf, XLOG_BLCKSZ, tweak, true);
+	}
+
 	*readTLI = curFileTLI;
 
 	/*
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 1c76dcfa0d..6fdba0eff4 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -28,6 +28,7 @@
 #include "miscadmin.h"
 #include "replication/origin.h"
 #include "storage/bufmgr.h"
+#include "storage/encryption.h"
 #include "storage/proc.h"
 #include "utils/memutils.h"
 #include "pg_trace.h"
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 10a663bae6..0ebed22895 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -25,6 +25,7 @@
 #include "access/xlogutils.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "storage/encryption.h"
 #include "storage/smgr.h"
 #include "utils/guc.h"
 #include "utils/hsearch.h"
@@ -653,12 +654,13 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
  * frontend).  Probably these should be merged at some point.
  */
 static void
-XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
-		 Size count)
+XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr, Size count)
 {
-	char	   *p;
 	XLogRecPtr	recptr;
 	Size		nbytes;
+	char	   *decrypt_p;
+	uint32		decryptOff;
+	char	   *p;
 
 	/* state maintained across calls */
 	static int	sendFile = -1;
@@ -668,17 +670,42 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
 
 	Assert(segsize == wal_segment_size);
 
+	/*
+	 * XXX Currently the function is only called with startptr at page
+	 * boundary. If it should change, encryption needs to reflect this fact,
+	 * i.e. read data from the beginning of the page. Actually this is a
+	 * reason to adjust and use walsender.c:XLogRead().
+	 */
+	Assert(startptr % XLOG_BLCKSZ == 0);
+
 	p = buf;
 	recptr = startptr;
 	nbytes = count;
 
+	decrypt_p = p;
+	decryptOff = XLogSegmentOffset(recptr, segsize);
+
 	while (nbytes > 0)
 	{
 		uint32		startoff;
 		int			segbytes;
 		int			readbytes;
 
-		startoff = XLogSegmentOffset(recptr, segsize);
+		if (recptr == startptr)
+			startoff = decryptOff;
+		else
+		{
+			startoff = XLogSegmentOffset(recptr, segsize);
+			if (startoff == 0)
+			{
+				/*
+				 * If segment boundary was reached, decryptOff should have
+				 * caught up, so we can (and should) sync it with startoff.
+				 */
+				Assert(decryptOff == segsize);
+				decryptOff = startoff;
+			}
+		}
 
 		/* Do we need to switch to a different xlog segment? */
 		if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, segsize) ||
@@ -758,7 +785,30 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
 		sendOff += readbytes;
 		nbytes -= readbytes;
 		p += readbytes;
+
+		/* Decrypt completed blocks */
+		if (data_encrypted)
+		{
+			while (decrypt_p + XLOG_BLCKSZ <= p)
+			{
+				char		tweak[TWEAK_SIZE];
+
+				XLogEncryptionTweak(tweak, tli, sendSegNo, decryptOff);
+				decrypt_block(decrypt_p, decrypt_p, XLOG_BLCKSZ, tweak,
+							  true);
+
+				decrypt_p += XLOG_BLCKSZ;
+				decryptOff += XLOG_BLCKSZ;
+			}
+		}
 	}
+
+	/*
+	 * XXX Currently the function is only called with count==XLOG_BLCKSZ. If
+	 * that should change, the rest of the data needs to be decrypted. Another
+	 * reason to reuse  walsender.c:XLogRead().
+	 */
+	Assert(decrypt_p == p);
 }
 
 /*
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index d8776e192e..e68b8019f0 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -26,6 +26,7 @@
 #include "bootstrap/bootstrap.h"
 #include "catalog/index.h"
 #include "catalog/pg_collation.h"
+#include "catalog/pg_control.h"
 #include "catalog/pg_type.h"
 #include "common/link-canary.h"
 #include "libpq/pqsignal.h"
@@ -40,6 +41,7 @@
 #include "storage/bufmgr.h"
 #include "storage/bufpage.h"
 #include "storage/condition_variable.h"
+#include "storage/encryption.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
 #include "tcop/tcopprot.h"
@@ -52,7 +54,6 @@
 
 uint32		bootstrap_data_checksum_version = 0;	/* No checksum */
 
-
 #define ALLOC(t, c) \
 	((t *) MemoryContextAllocZero(TopMemoryContext, (unsigned)(c) * sizeof(t)))
 
@@ -226,7 +227,7 @@ AuxiliaryProcessMain(int argc, char *argv[])
 	/* If no -x argument, we are a CheckerProcess */
 	MyAuxProcType = CheckerProcess;
 
-	while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:x:X:-:")) != -1)
+	while ((flag = getopt(argc, argv, "B:c:d:D:FkK:r:x:X:-:")) != -1)
 	{
 		switch (flag)
 		{
@@ -252,6 +253,25 @@ AuxiliaryProcessMain(int argc, char *argv[])
 			case 'F':
 				SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
 				break;
+#ifdef	USE_ENCRYPTION
+			case 'K':
+				encryption_key_command = strdup(optarg);
+
+				/*
+				 * When auxiliary process (typically bootstrap) starts, the
+				 * control file might not exist yet. In this case we also use
+				 * encryption_key_command to indicate that the encryption is
+				 * enabled.
+				 *
+				 * Postmaster should not set this variable. Instead, it just
+				 * sets data_encrypted according to the control file and child
+				 * processes inherit that.
+				 */
+				Assert(!IsUnderPostmaster);
+				data_encrypted = true;
+
+				break;
+#endif							/* USE_ENCRYPTION */
 			case 'k':
 				bootstrap_data_checksum_version = PG_DATA_CHECKSUM_VERSION;
 				break;
@@ -373,6 +393,20 @@ AuxiliaryProcessMain(int argc, char *argv[])
 	if (!IsUnderPostmaster)
 		InitializeMaxBackends();
 
+	/*
+	 * If data_encryption is set because of command line argument, do the
+	 * setup now. (If set by postmaster, postmaster should have performed the
+	 * setup.)
+	 *
+	 * This should only be useful for the bootstrap process. Anyone else
+	 * initializes the encryption via ReadControlFile().
+	 */
+	if (data_encrypted && MyAuxProcType == BootstrapProcess)
+	{
+		Assert(!IsUnderPostmaster);
+		setup_encryption(true, NULL);
+	}
+
 	BaseInit();
 
 	/*
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 5015e5b3b6..4589a2c2f4 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -627,7 +627,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 			 *
 			 * We don't need to copy subdirectories
 			 */
-			copydir(srcpath, dstpath, false);
+			{
+				RelFileNode fromNode = {srctablespace, src_dboid, InvalidOid};
+				RelFileNode toNode = {dsttablespace, dboid, InvalidOid};
+
+				copydir(srcpath, dstpath, &fromNode, &toNode);
+			}
 
 			/* Record the filesystem change in XLOG */
 			{
@@ -1256,7 +1261,12 @@ movedb(const char *dbname, const char *tblspcname)
 		/*
 		 * Copy files from the old tablespace to the new one
 		 */
-		copydir(src_dbpath, dst_dbpath, false);
+		{
+			RelFileNode fromNode = {src_tblspcoid, db_id, InvalidOid};
+			RelFileNode toNode = {dst_tblspcoid, db_id, InvalidOid};
+
+			copydir(src_dbpath, dst_dbpath, &fromNode, &toNode);
+		}
 
 		/*
 		 * Record the filesystem change in XLOG
@@ -2119,7 +2129,12 @@ dbase_redo(XLogReaderState *record)
 		 *
 		 * We don't need to copy subdirectories
 		 */
-		copydir(src_path, dst_path, false);
+		{
+			RelFileNode fromNode = {xlrec->src_tablespace_id, xlrec->src_db_id, InvalidOid};
+			RelFileNode toNode = {xlrec->tablespace_id, xlrec->db_id, InvalidOid};
+
+			copydir(src_path, dst_path, &fromNode, &toNode);
+		}
 	}
 	else if (info == XLOG_DBASE_DROP)
 	{
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 51ff2cfa65..baedb4e95e 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3878,6 +3878,15 @@ pgstat_get_wait_io(WaitEventIO w)
 		case WAIT_EVENT_DSM_FILL_ZERO_WRITE:
 			event_name = "DSMFillZeroWrite";
 			break;
+		case WAIT_EVENT_KDF_FILE_READ:
+			event_name = "KDFFileRead";
+			break;
+		case WAIT_EVENT_KDF_FILE_SYNC:
+			event_name = "KDFFileSync";
+			break;
+		case WAIT_EVENT_KDF_FILE_WRITE:
+			event_name = "KDFFileWrite";
+			break;
 		case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ:
 			event_name = "LockFileAddToDataDirRead";
 			break;
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 60d29a2089..fff0c67411 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -117,6 +117,7 @@
 #include "postmaster/syslogger.h"
 #include "replication/logicallauncher.h"
 #include "replication/walsender.h"
+#include "storage/encryption.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/pg_shmem.h"
@@ -669,7 +670,7 @@ PostmasterMain(int argc, char *argv[])
 	 * tcop/postgres.c (the option sets should not conflict) and with the
 	 * common help() function in main/main.c.
 	 */
-	while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
+	while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijK:k:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
 	{
 		switch (opt)
 		{
@@ -727,6 +728,12 @@ PostmasterMain(int argc, char *argv[])
 				/* only used by interactive backend */
 				break;
 
+#ifdef	USE_ENCRYPTION
+			case 'K':
+				encryption_key_command = strdup(optarg);
+				break;
+#endif							/* USE_ENCRYPTION */
+
 			case 'k':
 				SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 				break;
@@ -1234,6 +1241,11 @@ PostmasterMain(int argc, char *argv[])
 
 	/*
 	 * Set up shared memory and semaphores.
+	 *
+	 * This includes call of setup_encryption() as soon as we realize that
+	 * "data_encrypted" field of the control file is set. The encryption must
+	 * be initialized at the point so that "encryption_verification" field of
+	 * the control file can be checked.
 	 */
 	reset_shared(PostPortNumber);
 
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 36dcb28754..ba047f43b1 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1109,7 +1109,7 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 		/* Exclude all forks for unlogged tables except the init fork */
 		if (isDbDir &&
 			parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
-												&relForkNum))
+												&relForkNum, NULL))
 		{
 			/* Never exclude init forks */
 			if (relForkNum != INIT_FORKNUM)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 09c8b5a5b3..59743976c0 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -77,6 +77,7 @@
 #include "replication/walsender.h"
 #include "replication/walsender_private.h"
 #include "storage/condition_variable.h"
+#include "storage/encryption.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/pmsignal.h"
@@ -256,7 +257,9 @@ static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time);
 static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now);
 static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);
 
-static void XLogRead(char *buf, XLogRecPtr startptr, Size count);
+static int	XLogReadBuffer(char *buf, int nbytes, int startoff);
+static void XLogRead(char *buf, XLogRecPtr startptr, Size count,
+		 bool decrypt);
 
 
 /* Initialize walsender process before entering the main command loop */
@@ -787,7 +790,7 @@ logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int req
 		count = flushptr - targetPagePtr;	/* part of the page available */
 
 	/* now actually read the data, we know it's there */
-	XLogRead(cur_page, targetPagePtr, XLOG_BLCKSZ);
+	XLogRead(cur_page, targetPagePtr, XLOG_BLCKSZ, true);
 
 	return count;
 }
@@ -2346,19 +2349,53 @@ WalSndKill(int code, Datum arg)
 	SpinLockRelease(&walsnd->mutex);
 }
 
+static int
+XLogReadBuffer(char *buf, int nbytes, int startoff)
+{
+	int			readbytes;
+
+	/* Need to seek in the file? */
+	if (sendOff != startoff)
+	{
+		if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not seek in log segment %s to offset %u: %m",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							startoff)));
+		sendOff = startoff;
+	}
+
+	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
+	readbytes = read(sendFile, buf, nbytes);
+	pgstat_report_wait_end();
+	if (readbytes <= 0)
+	{
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not read from log segment %s, offset %u, length %lu: %m",
+						XLogFileNameP(curFileTimeLine, sendSegNo),
+						sendOff, (unsigned long) nbytes)));
+	}
+
+	return readbytes;
+}
+
 /*
- * Read 'count' bytes from WAL into 'buf', starting at location 'startptr'
+ * Read 'count' bytes from WAL into 'buf', starting at location
+ * 'startptr'. Decrypt the data if it's encrypted and if caller wants it
+ * decrypted.
  *
- * XXX probably this should be improved to suck data directly from the
- * WAL buffers when possible.
+ * XXX probably this should be improved to fetch data directly from the WAL
+ * buffers when possible.
  *
  * Will open, and keep open, one WAL segment stored in the global file
- * descriptor sendFile. This means if XLogRead is used once, there will
- * always be one descriptor left open until the process ends, but never
- * more than one.
+ * descriptor sendFile. This means if XLogRead is used once, there will always
+ * be one descriptor left open until the process ends, but never more than
+ * one.
  */
 static void
-XLogRead(char *buf, XLogRecPtr startptr, Size count)
+XLogRead(char *buf, XLogRecPtr startptr, Size count, bool decrypt)
 {
 	char	   *p;
 	XLogRecPtr	recptr;
@@ -2448,42 +2485,87 @@ retry:
 			sendOff = 0;
 		}
 
-		/* Need to seek in the file? */
-		if (sendOff != startoff)
+		/* Caller should not request decryption of unencrypted data. */
+		Assert(!(decrypt && !data_encrypted));
+
+		if (data_encrypted && decrypt)
 		{
-			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not seek in log segment %s to offset %u: %m",
-								XLogFileNameP(curFileTimeLine, sendSegNo),
-								startoff)));
-			sendOff = startoff;
-		}
+			int			pageoff = startoff % XLOG_BLCKSZ;
+			uint32		pagebase = startoff - pageoff;
+			int			bufbytes,
+						bufend,
+						i;
+			char		tweak[TWEAK_SIZE];
 
-		/* How many bytes are within this segment? */
-		if (nbytes > (wal_segment_size - startoff))
-			segbytes = wal_segment_size - startoff;
-		else
-			segbytes = nbytes;
+			/*
+			 * Only accept as much data as what can fit into the buffer.
+			 */
+			if (nbytes > (ENCRYPT_BUF_XLOG_SIZE - pageoff))
+				bufbytes = ENCRYPT_BUF_XLOG_SIZE - pageoff;
+			else
+				bufbytes = nbytes;
+			bufend = pageoff + bufbytes;
 
-		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-		readbytes = read(sendFile, p, segbytes);
-		pgstat_report_wait_end();
-		if (readbytes < 0)
-		{
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read from log segment %s, offset %u, length %zu: %m",
-							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, (Size) segbytes)));
+			/*
+			 * Read the data, including the leading part of the page which
+			 * caller is not interested in. The tweak we passed to
+			 * encrypt_block() for encryption was for the beginning of the
+			 * block, so it'd be hard to start decryption anywhere else.
+			 */
+			readbytes = 0;
+			while (readbytes < bufend)
+				readbytes += XLogReadBuffer(encrypt_buf_xlog + readbytes,
+											bufend - readbytes,
+											pagebase + readbytes);
+
+			/*
+			 * Decrypt the data one page at a time (the tweak is only valid
+			 * for particular page).
+			 */
+			for (i = 0; i < readbytes; i += XLOG_BLCKSZ)
+			{
+				Size		nencrypt;
+
+				XLogEncryptionTweak(tweak,
+									curFileTimeLine,
+									sendSegNo,
+									pagebase + i);
+
+				/*
+				 * If the last page is not complete, only decrypt the used
+				 * part.
+				 */
+				if ((bufend - i) < XLOG_BLCKSZ)
+					nencrypt = bufend - i;
+				else
+					nencrypt = XLOG_BLCKSZ;
+
+				decrypt_block(encrypt_buf_xlog + i,
+							  encrypt_buf_xlog + i,
+							  nencrypt,
+							  tweak,
+							  true);
+			}
+
+			/*
+			 * Caller does not care that we possibly had to read pageoff bytes
+			 * in addition (because we cannot decrypt trailing part of the
+			 * page alone). This overhead must not affect the accounting.
+			 */
+			readbytes = bufbytes;
+
+			/* Copy the data to the output buffer. */
+			memcpy(p, encrypt_buf_xlog + pageoff, bufbytes);
 		}
-		else if (readbytes == 0)
+		else
 		{
-			ereport(ERROR,
-					(errcode(ERRCODE_DATA_CORRUPTED),
-					 errmsg("could not read from log segment %s, offset %u: read %d of %zu",
-							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, readbytes, (Size) segbytes)));
+			/* How many bytes are within this segment? */
+			if (nbytes > (wal_segment_size - startoff))
+				segbytes = wal_segment_size - startoff;
+			else
+				segbytes = nbytes;
+
+			readbytes = XLogReadBuffer(p, segbytes, startoff);
 		}
 
 		/* Update state for read */
@@ -2762,7 +2844,7 @@ XLogSendPhysical(void)
 	 * calls.
 	 */
 	enlargeStringInfo(&output_message, nbytes);
-	XLogRead(&output_message.data[output_message.len], startptr, nbytes);
+	XLogRead(&output_message.data[output_message.len], startptr, nbytes, false);
 	output_message.len += nbytes;
 	output_message.data[output_message.len] = '\0';
 
diff --git a/src/backend/storage/file/Makefile b/src/backend/storage/file/Makefile
index ca6a0e4f7d..9f277755fc 100644
--- a/src/backend/storage/file/Makefile
+++ b/src/backend/storage/file/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/storage/file
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = fd.o buffile.o copydir.o reinit.o sharedfileset.o
+OBJS = fd.o buffile.o copydir.o reinit.o sharedfileset.o encryption.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/file/README.encryption b/src/backend/storage/file/README.encryption
new file mode 100644
index 0000000000..b98cb0debd
--- /dev/null
+++ b/src/backend/storage/file/README.encryption
@@ -0,0 +1,198 @@
+src/backend/storage/file/README.encryption
+
+Transparent Cluster Encryption
+==============================
+
+When creating a new cluster (instance), user can choose to have his data
+encrypted on disk. If this feature is active, data is encrypted before it's
+written to disk and decrypted after it has been read, however the data is
+unencrypted in memory (data-at-rest encryption). The following characteristics
+should be considered by anyone who is interested to enable this feature:
+
+	1. The encryption is transparent from application's point of view.
+
+	2. A single key is used to encrypt the whole cluster.
+
+The full instance encryption feature helps to ensure data confidentiality,
+especially when user cannot rely on confidentiality on filesystem level. On
+the other hand, it does not ensure data integrity, i.e. does not help to
+detect whether an adversary wrote his cipher data to the disk. The block
+cipher methods generally do not protect data integrity, and it'd probably be
+hard anyway because we encrypt Postgres data pages (typically 8 kB) separate
+from each other. If the attacker only changes part of the page, this can be
+detected if data checksums feature is enabled. And specifically for XLOG, each
+XLOG record has its own checksum.
+
+Since the data is stored on a disk, we naturally base our approach on "Disk
+encryption theory" [1]. For each kind of file we use the AES cipher in the
+appropriate mode of operation. The AES cipher itself encrypts / decrypts
+individual blocks ("encryption blocks") of 16 bytes (128 bits), while the mode
+of operation defines the rules how to apply the cipher to data which spans
+multiple encryption blocks.
+
+Relations
+---------
+
+AES cipher in CBC mode [2] is used to encrypt relation files, one relation
+block (page) at a time. The important characteristic of this mode is that, if
+a single encryption block changes, the next run of the encryption produces an
+encrypted page where not only that block, but also all the following blocks
+are encrypted differently. Furthermore, as the Postgres page starts with LSN,
+even if only the last encryption block of the page changes, the whole cipher
+text of the page will be different after the next encryption.
+
+This propagation of changes across encryption blocks makes it harder for
+adversary to deduce what happens in the database. Without the propagation he
+might be able to see data changes with higher granularity. For example, if
+only the beginning of index page changes, it gives some information on the
+keys inserted.
+
+On the other hand, the same change propagation can cause problems if only a
+part of a new version of encrypted page is written to disk (torn page
+write). If such a page is decrypted during crash recovery, part of the plain
+text can become garbage. This can affect even those parts of the page that
+haven't been modified recently, and therefore XLOG records to fix those parts
+are not guaranteed to be available. Therefore, if the cluster encryption is
+enabled, full_page_writes configuration variable must be set too, otherwise
+the server refuses to start. With this setting, the damage described here can
+be fixed by retrieving the full-page-image (FPI) from XLOG and applying it.
+
+However, even if full_page_writes is set, Postgres can still avoid writing FPI
+XLOG record if only hint bit(s) changed on a page, and if wal_log_hints
+configuration variable is not set. The problem with encryption is that even a
+single bit change is propagated to all the following encryption blocks during
+the next encryption of the page. Again, if only part of the changed cipher
+text is written, the remaining portion of the page becomes garbage after
+decryption, and we might not be able to fix it during recovery unless we have
+the FPI in the XLOG. So if encryption is enabled, Postgres behaves as if
+wal_log_hints was always set.
+
+XLOG
+----
+
+The specific problem of XLOG is that record must not be changed once it has
+been flushed to disk. However if we used a block cipher, and if a new XLOG
+record started in an encryption block in which the previous record ends, that
+encryption block would become completely different after the next run of
+encryption. Torn write of such a block (e.g. if memory page boundary crosses
+the encryption block) is likely to make decryption produce garbage, which will
+also appear in the already-flushed record.
+
+Therefore we encrypt XLOG using a stream cipher, or rather block cipher in
+stream mode of operation. Stream cipher uses XOR operation to combine a "key
+stream" with the input stream, and it does not matter if the length of the
+input stream is aligned to any value. In particular, the CTR mode [3] was
+chosen because it allows for both read and write operations to be
+parallelized.
+
+The XLOG is encrypted / decrypted one XLOG page (typically 8 kB) at a time, so
+that we can access the pages independently from each other. Thus, instead of
+being a single long stream, the XLOG is actually a sequence of short streams,
+each of which spans one page.
+
+Since stream ciphers are susceptible to "reused key attack" [4], we must
+ensure that the unused part of the last XLOG page (filled with zeroes) is
+never encrypted.
+
+Temporary files
+---------------
+
+BufFileWrite() and BufFileRead() functions (see buffile.c) hide the encryption
+/ decryption from caller. The encryption / decryption processes one buffer at
+a time so that the buffers can be retrieved independent from each other.
+
+If the encryption is enabled, the following requirements need to be taken into
+account:
+
+1. The file buffer cannot be positioned at arbitrary offset of the file. If
+the encryption routine starts at some position of the file, decryption must
+not start elsewhere because there's no way to determine which initialization
+vector was used internally for the corresponding encryption blocks (of 16
+bytes) during encryption. It makes sense to position the buffer at file offset
+that is whole multiple of buffer size.
+
+2. In general, the useful (written) data does not fill whole multiple of
+encryption blocks, but we must write the whole blocks for decryption to
+succeed. This implies that we need to fill the unused part of the last block
+with zeroes and also remember the amount of useful bytes in the segment file,
+which excludes the padding. (In fact we align the segment file size to file
+buffers instead of encryption blocks, which probably makes the implementation
+a bit simpler.)
+
+Stream cipher might seem like a solution of the padding problem, but we cannot
+use it here because parts of the temporary file can be rewritten. That would
+expose the temporary file to "reused key attack" [4].
+
+Auxiliary files
+---------------
+
+To store other kinds of data encrypted than the ones above, developers are
+advised to use BufFileWriteTransient() and BufFileReadTransient() functions
+(also located in buffile.c). These are especially useful if some data
+structure should be written to an encrypted file and user does not want to
+care whether the next write position is at encryption block boundary. In case
+it is not, BufFileWriteTransient() will read the encryption block the
+structure falls into, decrypt it, add the new data to it and decrypt and write
+it back to the file. Likewise, BufFileReadTransient() ensures that only the
+whole encryption blocks are read from disk and decrypted, whether the next
+read position is at block boundary or not.
+
+Serialization of data changes during logical decoding (reorderbuffer.c) is the
+typical use case for this API.
+
+Initialization vector (IV), encryption tweak
+--------------------------------------------
+
+Besides the input data and the key, both block and stream cipher used for the
+Postgres cluster encryption require an initialization vector. It should make
+analysis of the encrypted data more difficult. Also according to [1], the same
+data should be encrypted differently if located elsewhere on the disk. Term
+"encryption tweak" is more common in the context of disk encryption.
+
+When encrypting relations, each encryption unit (i.e. page) has an unique
+tweak, which consists of relfilenode, buffer number and fork number. As a
+consequence, we need to decrypt page using the existing tweak and encrypt it
+with a new tweak if copying it from one relation to another. Typically this
+happens when a new database is being created from template database, or during
+pg_upgrade. (Therefore the encrypted cluster does not support pg_upgrade with
+the --link option.)
+
+XLOG encryption tweak consists of timeline, segment number and offset at which
+the XLOG page starts in the segment. The "re-encryption" takes place when XLOG
+page is copied from one timeline to another, typicially at the end of
+recovery.
+
+As for temporary files, PID of the owning backend, file / fileset number and
+block number (where block is of the same size as a relation page) within the
+file provide sufficient uniqueness, so we use these to generate the tweak.
+
+Auxiliary files can be closed and reopened by another backend, so there's no
+PID strictly associated with them. Therefore we generate the tweak by hashing
+the file path, and appending block number to the hash.
+
+Replication
+-----------
+
+During streaming replication, the walsender process sends the XLOG encrypted
+and walreceiver just writes it. Decryption is performed before the slave
+cluster tries to apply the changes. That implies that the same encryption key
+must be used both master and slave. If it should be possible someday to stream
+the XLOG unencrypted, one implication is that pg_basebackup should also
+receive the relation data unencrypted.
+
+As for logical replication, the only change introduced by this feature is that
+the XLOG has to be decrypted before the contained data changes can be
+decoded. The data changes are transferred to the subscribing database /
+cluster unencrypted, so both master and slave can use different encryption
+keys.
+
+References
+----------
+
+[1] https://en.wikipedia.org/wiki/Disk_encryption_theory
+
+[2] https://en.wikipedia.org/wiki/Disk_encryption_theory#Cipher-block_chaining_(CBC)
+
+[3] https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)
+
+[4] https://en.wikipedia.org/wiki/Stream_cipher_attacks#Reused_key_attack
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index e9fc11d162..1a5fdaf9e5 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -43,18 +43,24 @@
 
 #include <unistd.h>
 
+#include "common/sha2.h"
 #include "executor/instrument.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/fd.h"
 #include "storage/buffile.h"
 #include "storage/buf_internals.h"
+#include "storage/encryption.h"
+#include "utils/datetime.h"
 #include "utils/resowner.h"
 
 /*
  * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
  * The reason is that we'd like large BufFiles to be spread across multiple
  * tablespaces when available.
+ *
+ * The number of useful bytes is appended to each segment of shared encrypted
+ * file, see BufFile.useful.
  */
 #define MAX_PHYSICAL_FILESIZE	0x40000000
 #define BUFFILE_SEG_SIZE		(MAX_PHYSICAL_FILESIZE / BLCKSZ)
@@ -81,6 +87,27 @@ typedef struct BufFileCommon
 
 	bool		append;			/* should new data be appended to the end? */
 
+	/*
+	 * If the file is encrypted, only the whole buffer can be loaded / dumped
+	 * --- see BufFileLoadBuffer() for more info --- whether it's space is
+	 * used up or not. Therefore we need to keep track of the actual on-disk
+	 * size buffer of each component file, as it would be if there was no
+	 * encryption.
+	 *
+	 * List would make coding simpler, however it would not be good for
+	 * performance. Random access is important here.
+	 */
+	off_t	   *useful;
+
+	/*
+	 * The "useful" array may need to be expanded independent from
+	 * extendBufFile() (i.e. earlier than the buffer gets dumped), so store
+	 * the number of elements separate from numFiles.
+	 *
+	 * Always 1 for TransientBufFile.
+	 */
+	int			nuseful;
+
 	PGAlignedBlock buffer;
 } BufFileCommon;
 
@@ -97,12 +124,26 @@ struct BufFile
 	/* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
 	File	   *files;			/* palloc'd array with numFiles entries */
 
+	/*
+	 * Segment number is used to compute encryption tweak so we must remember
+	 * the original numbers of segments if the file is encrypted and if it was
+	 * passed as target to BufFileAppend() at least once. If this field is
+	 * NULL, ->curFile is used to compute the tweak.
+	 */
+	off_t	   *segnos;
+
 	bool		isInterXact;	/* keep open over transactions? */
 
 	SharedFileSet *fileset;		/* space for segment files if shared */
 	const char *name;			/* name of this BufFile if shared */
 
 	/*
+	 * Per-PID identifier if the file is encrypted and not shared. Used for
+	 * tweak computation.
+	 */
+	uint32		number;
+
+	/*
 	 * resowner is the ResourceOwner to use for underlying temp files.  (We
 	 * don't need to remember the memory context we're using explicitly,
 	 * because after creation we only repalloc our arrays larger.)
@@ -132,6 +173,9 @@ static void BufFileDumpBuffer(BufFile *file);
 static int	BufFileFlush(BufFile *file);
 static File MakeNewSharedSegment(BufFile *file, int segment);
 
+static void BufFileTweak(char *tweak, BufFileCommon *file, bool is_transient);
+static void ensureUsefulArraySize(BufFileCommon *file, int required);
+
 static void BufFileLoadBufferTransient(TransientBufFile *file);
 static void BufFileDumpBufferTransient(TransientBufFile *file);
 
@@ -159,6 +203,25 @@ makeBufFileCommon(int nfiles)
 	file->isInterXact = false;
 	file->resowner = CurrentResourceOwner;
 
+	if (data_encrypted)
+	{
+		fcommon->useful = (off_t *) palloc0(sizeof(off_t) * nfiles);
+		fcommon->nuseful = nfiles;
+
+		file->segnos = NULL;
+
+		/*
+		 * The unused (trailing) part of the buffer should not contain
+		 * undefined data: if we encrypt such a buffer and flush it to disk,
+		 * the encrypted form of that "undefined part" can get zeroed due to
+		 * seek and write beyond EOF. If such a buffer gets loaded and
+		 * decrypted, the change of the undefined part to zeroes can affect
+		 * the valid part if it does not end at block boundary. By setting the
+		 * whole buffer to zeroes we ensure that the unused part of the buffer
+		 * always contains zeroes.
+		 */
+		MemSet(fcommon->buffer.data, 0, BLCKSZ);
+	}
 	return file;
 }
 
@@ -204,6 +267,19 @@ extendBufFile(BufFile *file)
 
 	file->files = (File *) repalloc(file->files,
 									(file->numFiles + 1) * sizeof(File));
+
+	if (data_encrypted)
+	{
+		ensureUsefulArraySize(&file->common, file->numFiles + 1);
+
+		if (file->segnos)
+		{
+			file->segnos = (off_t *) repalloc(file->segnos,
+											  (file->numFiles + 1) * sizeof(off_t));
+			file->segnos[file->numFiles] = file->numFiles;
+		}
+	}
+
 	file->files[file->numFiles] = pfile;
 	file->numFiles++;
 }
@@ -226,10 +302,16 @@ BufFileCreateTemp(bool interXact)
 	BufFile    *file;
 	File		pfile;
 
+	static uint32 counter_temp = 0;
+
 	pfile = OpenTemporaryFile(interXact);
 	Assert(pfile >= 0);
 
 	file = makeBufFile(pfile);
+
+	file->number = counter_temp;
+	counter_temp = (counter_temp + 1) % INT_MAX;
+
 	file->isInterXact = interXact;
 
 	return file;
@@ -333,6 +415,7 @@ BufFileOpenShared(SharedFileSet *fileset, const char *name)
 		files[nfiles] = SharedFileSetOpen(fileset, segment_name);
 		if (files[nfiles] <= 0)
 			break;
+
 		++nfiles;
 
 		CHECK_FOR_INTERRUPTS();
@@ -349,7 +432,51 @@ BufFileOpenShared(SharedFileSet *fileset, const char *name)
 						segment_name, name)));
 
 	file = makeBufFileCommon(nfiles);
+
+	/*
+	 * Shared encrypted segment should, at its end, contain information on the
+	 * number of useful bytes in the last buffer.
+	 */
+	if (data_encrypted)
+	{
+		off_t		pos;
+		int			i;
+
+		for (i = 0; i < nfiles; i++)
+		{
+			int			nbytes;
+			File		segment = files[i];
+
+			pos = FileSize(segment) - sizeof(off_t);
+
+			/*
+			 * The word must immediately follow the last buffer of the
+			 * segment.
+			 */
+			if (pos <= 0 || pos % BLCKSZ != 0)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not find padding info in BufFile \"%s\": %m",
+								name)));
+
+			nbytes = FileRead(segment, (char *) &file->common.useful[i],
+							  sizeof(off_t), pos, WAIT_EVENT_BUFFILE_READ);
+			if (nbytes != sizeof(off_t))
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read padding info from BufFile \"%s\": %m",
+								name)));
+			Assert(file->common.useful[i] > 0);
+
+			CHECK_FOR_INTERRUPTS();
+		}
+	}
+
 	file->files = files;
+
+	if (data_encrypted)
+		file->common.nuseful = nfiles;
+
 	file->common.readOnly = true;	/* Can't write to files opened this way */
 	file->fileset = fileset;
 	file->name = pstrdup(name);
@@ -428,6 +555,14 @@ BufFileClose(BufFile *file)
 		FileClose(file->files[i]);
 	/* release the buffer space */
 	pfree(file->files);
+
+	if (data_encrypted)
+	{
+		if (file->segnos)
+			pfree(file->segnos);
+		pfree(file->common.useful);
+	}
+
 	pfree(file);
 }
 
@@ -444,6 +579,14 @@ BufFileLoadBuffer(BufFile *file)
 	File		thisfile;
 
 	/*
+	 * Only whole multiple of ENCRYPTION_BLOCK can be encrypted / decrypted,
+	 * see comments in BufFileDumpBuffer().
+	 */
+	Assert((file->common.curOffset % BLCKSZ == 0 &&
+			file->common.curOffset % ENCRYPTION_BLOCK == 0) ||
+		   !data_encrypted);
+
+	/*
 	 * Advance to next component file if necessary and possible.
 	 */
 	if (file->common.curOffset >= MAX_PHYSICAL_FILESIZE &&
@@ -454,6 +597,20 @@ BufFileLoadBuffer(BufFile *file)
 	}
 
 	/*
+	 * See makeBufFileCommon().
+	 *
+	 * Actually here we only handle the case of FileRead() returning zero
+	 * bytes below. In contrast, if the buffer contains any data but it's not
+	 * full, it should already have the trailing zeroes (encrypted) on disk.
+	 * And as the encrypted buffer is always loaded in its entirety (i.e. EOF
+	 * should only appear at buffer boundary if the data is encrypted), all
+	 * unused bytes of the buffer should eventually be zeroes after
+	 * decryption.
+	 */
+	if (data_encrypted)
+		MemSet(file->common.buffer.data, 0, BLCKSZ);
+
+	/*
 	 * Read whatever we can get, up to a full bufferload.
 	 */
 	thisfile = file->files[file->common.curFile];
@@ -466,6 +623,54 @@ BufFileLoadBuffer(BufFile *file)
 		file->common.nbytes = 0;
 	/* we choose not to advance curOffset here */
 
+	if (data_encrypted && file->common.nbytes > 0)
+	{
+		char		tweak[TWEAK_SIZE];
+		int			nbytes = file->common.nbytes;
+
+		/*
+		 * The encrypted component file can only consist of whole number of
+		 * our encryption units. (Only the whole buffers are dumped / loaded.)
+		 * The only exception is that we're at the end of segment file and
+		 * found the word indicating the number of useful bytes in the
+		 * segment. This can only happen for shared file.
+		 */
+		if (nbytes % BLCKSZ != 0)
+		{
+			Assert(nbytes == sizeof(off_t) && file->fileset != NULL);
+
+			/*
+			 * This metadata his hidden to caller, so all he needs to know
+			 * that there's no real data at the end of the file.
+			 */
+			file->common.nbytes = 0;
+			return;
+		}
+
+		/* Decrypt the whole block at once. */
+		BufFileTweak(tweak, &file->common, false);
+		decrypt_block(file->common.buffer.data,
+					  file->common.buffer.data,
+					  BLCKSZ,
+					  tweak,
+					  false);
+
+#ifdef	USE_ASSERT_CHECKING
+
+		/*
+		 * The unused part of the buffer which we've read from disk and
+		 * decrypted should only contain zeroes, as explained in front of the
+		 * MemSet() call.
+		 */
+		{
+			int			i;
+
+			for (i = file->common.nbytes; i < BLCKSZ; i++)
+				Assert(file->common.buffer.data[i] == 0);
+		}
+#endif							/* USE_ASSERT_CHECKING */
+	}
+
 	if (file->common.nbytes > 0)
 		pgBufferUsage.temp_blks_read++;
 }
@@ -476,6 +681,9 @@ BufFileLoadBuffer(BufFile *file)
  * Dump buffer contents starting at curOffset.
  * At call, should have dirty = true, nbytes > 0.
  * On exit, dirty is cleared if successful write, and curOffset is advanced.
+ *
+ * XXX Consider separate function for encrypted buffer. (No loop is needed to
+ * dump the encrypted buffer.)
  */
 static void
 BufFileDumpBuffer(BufFile *file)
@@ -485,12 +693,24 @@ BufFileDumpBuffer(BufFile *file)
 	File		thisfile;
 
 	/*
+	 * See comments in BufFileLoadBuffer();
+	 */
+	Assert((file->common.curOffset % BLCKSZ == 0 &&
+			file->common.curOffset % ENCRYPTION_BLOCK == 0) ||
+		   !data_encrypted);
+
+	/*
+	 * Caller's responsibility.
+	 */
+	Assert(file->common.pos <= file->common.nbytes);
+
+	/*
 	 * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it
 	 * crosses a component-file boundary; so we need a loop.
 	 */
 	while (wpos < file->common.nbytes)
 	{
-		off_t		availbytes;
+		char	   *write_ptr;
 
 		/*
 		 * Advance to next component file if necessary and possible.
@@ -503,23 +723,71 @@ BufFileDumpBuffer(BufFile *file)
 			file->common.curOffset = 0L;
 		}
 
-		/*
-		 * Determine how much we need to write into this file.
-		 */
-		bytestowrite = file->common.nbytes - wpos;
-		availbytes = MAX_PHYSICAL_FILESIZE - file->common.curOffset;
+		if (!data_encrypted)
+		{
+			off_t		availbytes;
+
+			bytestowrite = file->common.nbytes - wpos;
+			availbytes = MAX_PHYSICAL_FILESIZE - file->common.curOffset;
 
-		if ((off_t) bytestowrite > availbytes)
-			bytestowrite = (int) availbytes;
+			if ((off_t) bytestowrite > availbytes)
+				bytestowrite = (int) availbytes;
+
+			write_ptr = file->common.buffer.data;
+		}
+		else
+		{
+			char		tweak[TWEAK_SIZE];
+
+			/*
+			 * This condition plus the alignment of curOffset to BLCKSZ
+			 * (checked above) ensure that the encrypted buffer never crosses
+			 * component file boundary.
+			 */
+			StaticAssertStmt((MAX_PHYSICAL_FILESIZE % BLCKSZ) == 0,
+							 "BLCKSZ is not whole multiple of MAX_PHYSICAL_FILESIZE");
+
+			/*
+			 * Encrypted data is dumped all at once.
+			 *
+			 * Here we don't have to check availbytes because --- according to
+			 * the assertions above --- currOffset should be lower than
+			 * MAX_PHYSICAL_FILESIZE by non-zero multiple of BLCKSZ.
+			 */
+			bytestowrite = BLCKSZ;
+
+			/*
+			 * The amount of data encrypted must be a multiple of
+			 * ENCRYPTION_BLOCK. We meet this condition simply by encrypting
+			 * the whole buffer.
+			 *
+			 * XXX Alternatively we could encrypt only as few encryption
+			 * blocks that encompass file->common.nbyte bytes, but then we'd
+			 * have to care how many blocks should be decrypted: decryption of
+			 * the unencrypted trailing zeroes produces garbage, which can be
+			 * a problem if lseek() created "holes" in the file. Such a hole
+			 * should be read as a sequence of zeroes.
+			 */
+			BufFileTweak(tweak, &file->common, false);
+
+			encrypt_block(file->common.buffer.data,
+						  encrypt_buf.data,
+						  BLCKSZ,
+						  tweak,
+						  false);
+			write_ptr = encrypt_buf.data;
+		}
 
 		thisfile = file->files[file->common.curFile];
 		bytestowrite = FileWrite(thisfile,
-								 file->common.buffer.data + wpos,
+								 write_ptr + wpos,
 								 bytestowrite,
 								 file->common.curOffset,
 								 WAIT_EVENT_BUFFILE_WRITE);
-		if (bytestowrite <= 0)
+		if (bytestowrite <= 0 ||
+			(data_encrypted && bytestowrite != BLCKSZ))
 			return;				/* failed to write */
+
 		file->common.curOffset += bytestowrite;
 		wpos += bytestowrite;
 
@@ -527,25 +795,115 @@ BufFileDumpBuffer(BufFile *file)
 	}
 	file->common.dirty = false;
 
-	/*
-	 * At this point, curOffset has been advanced to the end of the buffer,
-	 * ie, its original value + nbytes.  We need to make it point to the
-	 * logical file position, ie, original value + pos, in case that is less
-	 * (as could happen due to a small backwards seek in a dirty buffer!)
-	 */
-	file->common.curOffset -= (file->common.nbytes - file->common.pos);
-	if (file->common.curOffset < 0) /* handle possible segment crossing */
+	if (!data_encrypted)
 	{
-		file->common.curFile--;
-		Assert(file->common.curFile >= 0);
-		file->common.curOffset += MAX_PHYSICAL_FILESIZE;
+		/*
+		 * At this point, curOffset has been advanced to the end of the
+		 * buffer, ie, its original value + nbytes.  We need to make it point
+		 * to the logical file position, ie, original value + pos, in case
+		 * that is less (as could happen due to a small backwards seek in a
+		 * dirty buffer!)
+		 */
+		file->common.curOffset -= (file->common.nbytes - file->common.pos);
+		if (file->common.curOffset < 0) /* handle possible segment crossing */
+		{
+			file->common.curFile--;
+			Assert(file->common.curFile >= 0);
+			file->common.curOffset += MAX_PHYSICAL_FILESIZE;
+		}
+
+		/*
+		 * Now we can set the buffer empty without changing the logical
+		 * position
+		 */
+		file->common.pos = 0;
+		file->common.nbytes = 0;
 	}
+	else
+	{
+		/*
+		 * curOffset should be at buffer boundary and buffer is the smallest
+		 * I/O unit for encrypted data.
+		 */
+		Assert(file->common.curOffset % BLCKSZ == 0);
 
-	/*
-	 * Now we can set the buffer empty without changing the logical position
-	 */
-	file->common.pos = 0;
-	file->common.nbytes = 0;
+		/*
+		 * The number of useful bytes needs to be written at the end of each
+		 * encrypted segment of a shared file so that the other backends know
+		 * how many bytes of the last buffer are useful.
+		 */
+		if (file->fileset != NULL)
+		{
+			off_t		useful;
+
+			/*
+			 * nuseful may be increased earlier than numFiles but not later,
+			 * so the corresponding entry should always exist in ->useful.
+			 */
+			Assert(file->common.curFile < file->common.nuseful);
+
+			/*
+			 * The number of useful bytes in the current segment file.
+			 */
+			useful = file->common.useful[file->common.curFile];
+
+			/*
+			 * Have we dumped the last buffer of the segment, i.e. the one
+			 * that can contain padding?
+			 */
+			if (file->common.curOffset >= useful)
+			{
+				int			bytes_extra;
+
+				/*
+				 * Write the number of useful bytes in the segment.
+				 *
+				 * Do not increase curOffset afterwards. Thus we ensure that
+				 * the next buffer appended will overwrite the "useful" value
+				 * just written, instead of being appended to it.
+				 */
+				bytes_extra = FileWrite(file->files[file->common.curFile],
+										(char *) &useful,
+										sizeof(useful),
+										file->common.curOffset,
+										WAIT_EVENT_BUFFILE_WRITE);
+				if (bytes_extra != sizeof(useful))
+					return;		/* failed to write */
+			}
+		}
+
+		if (file->common.pos >= BLCKSZ)
+		{
+			Assert(file->common.pos == BLCKSZ);
+
+			/*
+			 * curOffset points to the beginning of the next buffer, so just
+			 * reset pos and nbytes.
+			 */
+			file->common.pos = 0;
+			file->common.nbytes = 0;
+
+			/* See makeBufFile() */
+			if (data_encrypted)
+				MemSet(file->common.buffer.data, 0, BLCKSZ);
+		}
+		else
+		{
+			/*
+			 * Move curOffset to the beginning of the just-written buffer and
+			 * preserve pos.
+			 */
+			file->common.curOffset -= BLCKSZ;
+
+			/*
+			 * At least pos bytes should be written even if the first change
+			 * since now appears at pos == nbytes, but in fact the whole
+			 * buffer will be written regardless pos. This is the price we pay
+			 * for the choosing BLCKSZ as the I/O unit for encrypted data.
+			 */
+			file->common.nbytes = BLCKSZ;
+		}
+	}
 }
 
 /*
@@ -676,9 +1034,43 @@ BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
 		return EOF;
 	/* Seek is OK! */
 	file->common.curFile = newFile;
-	file->common.curOffset = newOffset;
-	file->common.pos = 0;
-	file->common.nbytes = 0;
+	if (!data_encrypted)
+	{
+		file->common.curOffset = newOffset;
+		file->common.pos = 0;
+		file->common.nbytes = 0;
+	}
+	else
+	{
+		/*
+		 * Offset of an encrypted buffer must be a multiple of BLCKSZ.
+		 */
+		file->common.pos = newOffset % BLCKSZ;
+		file->common.curOffset = newOffset - file->common.pos;
+
+		/*
+		 * BufFileLoadBuffer() will set nbytes iff it can read something.
+		 */
+		file->common.nbytes = 0;
+
+		/*
+		 * Load and decrypt the existing part of the buffer.
+		 */
+		BufFileLoadBuffer(file);
+		if (file->common.nbytes == 0)
+		{
+			/*
+			 * The data requested is not in the file, but this is not an
+			 * error.
+			 */
+			return 0;
+		}
+
+		/*
+		 * The whole buffer should have been loaded.
+		 */
+		Assert(file->common.nbytes == BLCKSZ);
+	}
 	return 0;
 }
 
@@ -709,6 +1101,123 @@ BufFileSeekBlock(BufFile *file, long blknum)
 					   SEEK_SET);
 }
 
+static void
+BufFileTweak(char *tweak, BufFileCommon *file, bool is_transient)
+{
+	off_t		block;
+
+	/*
+	 * The unused bytes should always be defined.
+	 */
+	memset(tweak, 0, TWEAK_SIZE);
+
+	if (!is_transient)
+	{
+		BufFile    *tmpfile = (BufFile *) file;
+		pid_t		pid;
+		uint32		number;
+		int			curFile;
+
+		if (tmpfile->fileset)
+		{
+			pid = tmpfile->fileset->creator_pid;
+			number = tmpfile->fileset->number;
+		}
+		else
+		{
+			pid = MyProcPid;
+			number = tmpfile->number;
+		}
+
+		curFile = file->curFile;
+
+		/*
+		 * If the file was produced by BufFileAppend(), we need the original
+		 * curFile, as it was used originally for encryption.
+		 */
+		if (tmpfile->segnos)
+			curFile = tmpfile->segnos[curFile];
+
+		block = curFile * BUFFILE_SEG_SIZE + file->curOffset / BLCKSZ;
+
+		StaticAssertStmt(sizeof(pid) + sizeof(number) + sizeof(block) <=
+						 TWEAK_SIZE,
+						 "tweak components do not fit into TWEAK_SIZE");
+
+		/*
+		 * The tweak consists of PID of the owning backend (the leader backend
+		 * in the case of parallel query processing), number within the PID
+		 * and block number.
+		 *
+		 * XXX Additional flag would be handy to distinguish local file from
+		 * shared one. Since there's no more room within TWEAK_SIZE, should we
+		 * use the highest bit in one of the existing components (preferably
+		 * other than pid so that tweaks of different processes do not become
+		 * identical)?
+		 */
+		*((pid_t *) tweak) = pid;
+		tweak += sizeof(pid_t);
+		*((uint32 *) tweak) = number;
+		tweak += sizeof(number);
+		*((off_t *) tweak) = block;
+	}
+	else
+	{
+		TransientBufFile *transfile = (TransientBufFile *) file;
+		pg_sha256_ctx sha_ctx;
+		unsigned char sha[PG_SHA256_DIGEST_LENGTH];
+#define BUF_FILE_PATH_HASH_LEN	8
+
+		/*
+		 * For transient file we can't use any field of TransientBufFile
+		 * because this info gets lost if the file is closed and reopened.
+		 * Hash of the file path string is an easy way to get "persistent"
+		 * tweak value, however usual hashes do not fit into TWEAK_SIZE. The
+		 * hash portion that we can store is actually even smaller because
+		 * block number needs to be stored too. Even though we only use part
+		 * of the hash, the tweak we finally use for data encryption /
+		 * decryption should not be predictable because the tweak we compute
+		 * here is further processed, see cbc_essi_preprocess_tweak().
+		 */
+		pg_sha256_init(&sha_ctx);
+		pg_sha256_update(&sha_ctx,
+						 (uint8 *) transfile->path,
+						 strlen(transfile->path));
+		pg_sha256_final(&sha_ctx, sha);
+
+		StaticAssertStmt(BUF_FILE_PATH_HASH_LEN + sizeof(block) <= TWEAK_SIZE,
+						 "tweak components do not fit into TWEAK_SIZE");
+		memcpy(tweak, sha, BUF_FILE_PATH_HASH_LEN);
+		tweak += BUF_FILE_PATH_HASH_LEN;
+		block = file->curOffset / BLCKSZ;
+		*((off_t *) tweak) = block;
+	}
+}
+
+/*
+ * Make sure that BufFile.useful array has the required size.
+ */
+static void
+ensureUsefulArraySize(BufFileCommon *file, int required)
+{
+	/*
+	 * Does the array already have enough space?
+	 */
+	if (required <= file->nuseful)
+		return;
+
+	/*
+	 * It shouldn't be possible to jump beyond the end of the last segment,
+	 * i.e. skip more than 1 segment.
+	 */
+	Assert(file->nuseful + 1 == required);
+
+	file->useful = (off_t *)
+		repalloc(file->useful, required * sizeof(off_t));
+	file->useful[file->nuseful] = 0L;
+	file->nuseful++;
+}
+
 #ifdef NOT_USED
 /*
  * BufFileTellBlock --- block-oriented tell
@@ -740,14 +1249,32 @@ BufFileSize(BufFile *file)
 
 	Assert(file->fileset != NULL);
 
-	/* Get the size of the last physical file. */
-	lastFileSize = FileSize(file->files[file->numFiles - 1]);
-	if (lastFileSize < 0)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
-						FilePathName(file->files[file->numFiles - 1]),
-						file->name)));
+	if (data_encrypted)
+	{
+		/*
+		 * "useful" should be initialized even for shared file, see
+		 * BufFileOpenShared().
+		 */
+		Assert(file->common.useful != NULL &&
+			   file->common.nuseful >= file->numFiles);
+
+		/*
+		 * The number of useful bytes in the segment is what caller is
+		 * interested in.
+		 */
+		lastFileSize = file->common.useful[file->common.nuseful - 1];
+	}
+	else
+	{
+		/* Get the size of the last physical file. */
+		lastFileSize = FileSize(file->files[file->numFiles - 1]);
+		if (lastFileSize < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
+							FilePathName(file->files[file->numFiles - 1]),
+							file->name)));
+	}
 
 	return ((file->numFiles - 1) * (int64) MAX_PHYSICAL_FILESIZE) +
 		lastFileSize;
@@ -777,6 +1304,7 @@ BufFileAppend(BufFile *target, BufFile *source)
 {
 	long		startBlock = target->numFiles * BUFFILE_SEG_SIZE;
 	int			newNumFiles = target->numFiles + source->numFiles;
+	int			newNUseful = target->common.nuseful + source->common.nuseful;
 	int			i;
 
 	Assert(target->fileset != NULL);
@@ -791,6 +1319,63 @@ BufFileAppend(BufFile *target, BufFile *source)
 		repalloc(target->files, sizeof(File) * newNumFiles);
 	for (i = target->numFiles; i < newNumFiles; i++)
 		target->files[i] = source->files[i - target->numFiles];
+
+	if (data_encrypted)
+	{
+		/*
+		 * XXX As the typical use case is that parallel workers expose file to
+		 * the leader, can we expect both target and source to have been
+		 * exported, i.e. flushed? In such a case "nuseful" would have to be
+		 * equal to "numFiles" for both input files and the code could get a
+		 * bit simpler. It seems that at least source should be flushed, as
+		 * source->readOnly is expected to be true above.
+		 */
+		target->common.useful = (off_t *)
+			repalloc(target->common.useful, sizeof(off_t) * newNUseful);
+
+		for (i = target->common.nuseful; i < newNUseful; i++)
+			target->common.useful[i] = source->common.useful[i - target->common.nuseful];
+		target->common.nuseful = newNUseful;
+
+		/*
+		 * File segments can appear at different position due to
+		 * concatenation, so make sure we remember the original positions for
+		 * the sake of encryption tweak.
+		 */
+		if (target->segnos == NULL)
+		{
+			/*
+			 * If the target does not have the array yet, allocate it for both
+			 * target and source and initialize the target part.
+			 */
+			target->segnos = (off_t *) palloc(newNumFiles * sizeof(off_t));
+			for (i = 0; i < target->numFiles; i++)
+				target->segnos[i] = i;
+		}
+		else
+		{
+			/*
+			 * Use the existing target part and add space for the source part.
+			 */
+			target->segnos = (off_t *) repalloc(target->segnos,
+												newNumFiles * sizeof(off_t));
+		}
+
+		/*
+		 * The source segment number either equals to (0-based) index of the
+		 * segment, or to an element of an already existing array.
+		 */
+		for (i = target->numFiles; i < newNumFiles; i++)
+		{
+			off_t		segno = i - target->numFiles;
+
+			if (source->segnos == NULL)
+				target->segnos[i] = segno;
+			else
+				target->segnos[i] = source->segnos[segno];
+		}
+	}
+
 	target->numFiles = newNumFiles;
 
 	return startBlock;
@@ -817,6 +1402,28 @@ BufFileOpenTransient(const char *path, int fileFlags)
 	/* Check whether user wants read or write access. */
 	readOnly = (fileFlags & O_WRONLY) == 0;
 
+	if (data_encrypted)
+	{
+		/*
+		 * In the encryption case, even if user will only be allowed to write,
+		 * internally we also need to read, see below.
+		 */
+		fileFlags &= ~O_WRONLY;
+		fileFlags |= O_RDWR;
+
+		/*
+		 * We can only emulate the append behavior by setting curOffset to
+		 * file size because if the underlying file was opened in append mode,
+		 * we could not rewrite the old value of file->common.useful[0] with
+		 * data.
+		 */
+		if (fileFlags & O_APPEND)
+		{
+			append = true;
+			fileFlags &= ~O_APPEND;
+		}
+	}
+
 	/*
 	 * Append mode for read access is not useful, so don't bother implementing
 	 * it.
@@ -849,6 +1456,8 @@ BufFileOpenTransient(const char *path, int fileFlags)
 	fcommon->readOnly = readOnly;
 	fcommon->append = append;
 	fcommon->curFile = 0;
+	fcommon->useful = (off_t *) palloc0(sizeof(off_t));
+	fcommon->nuseful = 1;
 
 	file->path = pstrdup(path);
 	file->fd = fd;
@@ -869,6 +1478,56 @@ BufFileOpenTransient(const char *path, int fileFlags)
 	else
 		fcommon->curOffset = 0L;
 
+	/*
+	 * Encrypted transient file should, at its end, contain information on the
+	 * number of useful bytes in the last buffer.
+	 */
+	if (data_encrypted)
+	{
+		off_t		pos = size;
+		int			nbytes;
+
+		/* No metadata in an empty file. */
+		if (pos == 0)
+			return file;
+
+		pos -= sizeof(off_t);
+
+		/*
+		 * The word must immediately follow the last buffer of the segment.
+		 */
+		if (pos < 0 || pos % BLCKSZ != 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not find padding info in TransientBufFile \"%s\": %m",
+							path)));
+
+		errno = 0;
+		nbytes = pg_pread(file->fd,
+						  (char *) &fcommon->useful[0],
+						  sizeof(off_t),
+						  pos);
+		if (nbytes != sizeof(off_t))
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read padding info from TransientBufFile \"%s\": %m",
+							path)));
+		Assert(fcommon->useful[0] > 0);
+
+		if (fcommon->append)
+		{
+			off_t		useful = fcommon->useful[0];
+
+			/*
+			 * If new buffer should be added, make sure it will end up
+			 * immediately after the last complete one, and also that the next
+			 * write position follows the last valid byte.
+			 */
+			fcommon->pos = useful % BLCKSZ;
+			fcommon->curOffset = useful - fcommon->pos;
+		}
+	}
+
 	return file;
 }
 
@@ -900,6 +1559,8 @@ BufFileCloseTransient(TransientBufFile *file)
 				(errcode_for_file_access(),
 				 errmsg("could not close file \"%s\": %m", file->path)));
 
+	if (data_encrypted)
+		pfree(file->common.useful);
 	pfree(file->path);
 	pfree(file);
 }
@@ -916,6 +1577,9 @@ BufFileLoadBufferTransient(TransientBufFile *file)
 	Assert(!file->common.dirty);
 	Assert(file->common.pos == 0 && file->common.nbytes == 0);
 
+	/* See comments in BufFileLoadBuffer(). */
+	if (data_encrypted)
+		MemSet(file->common.buffer.data, 0, BLCKSZ);
 retry:
 
 	/*
@@ -940,6 +1604,54 @@ retry:
 		return;
 	}
 	/* we choose not to advance offset here */
+
+	if (data_encrypted && file->common.nbytes > 0)
+	{
+		char		tweak[TWEAK_SIZE];
+		int			nbytes = file->common.nbytes;
+
+		/*
+		 * The encrypted file can only consist of whole number of our
+		 * encryption units. (Only the whole buffers are dumped / loaded.) The
+		 * only exception is that we're at the end of segment file and found
+		 * the word indicating the number of useful bytes in the segment. This
+		 * can only happen for shared file.
+		 */
+		if (nbytes % BLCKSZ != 0)
+		{
+			Assert(nbytes == sizeof(off_t));
+
+			/*
+			 * This metadata his hidden to caller, so all he needs to know
+			 * that there's no real data at the end of the file.
+			 */
+			file->common.nbytes = 0;
+			return;
+		}
+
+		/* Decrypt the whole block at once. */
+		BufFileTweak(tweak, &file->common, true);
+		decrypt_block(file->common.buffer.data,
+					  file->common.buffer.data,
+					  BLCKSZ,
+					  tweak,
+					  false);
+
+#ifdef	USE_ASSERT_CHECKING
+
+		/*
+		 * The unused part of the buffer which we've read from disk and
+		 * decrypted should only contain zeroes, as explained in front of the
+		 * MemSet() call.
+		 */
+		{
+			int			i;
+
+			for (i = file->common.nbytes; i < BLCKSZ; i++)
+				Assert(file->common.buffer.data[i] == 0);
+		}
+#endif							/* USE_ASSERT_CHECKING */
+	}
 }
 
 /*
@@ -948,7 +1660,9 @@ retry:
 static void
 BufFileDumpBufferTransient(TransientBufFile *file)
 {
-	int			nwritten;
+	int			bytestowrite,
+				nwritten;
+	char	   *write_ptr;
 
 	/* This function should only be needed during write access ... */
 	Assert(!file->common.readOnly);
@@ -957,12 +1671,33 @@ BufFileDumpBufferTransient(TransientBufFile *file)
 	Assert(file->common.dirty);
 	Assert(file->common.nbytes > 0);
 
+	if (!data_encrypted)
+	{
+		write_ptr = file->common.buffer.data;
+		bytestowrite = file->common.nbytes;
+	}
+	else
+	{
+		char		tweak[TWEAK_SIZE];
+
+		/*
+		 * Encrypt the whole buffer, see comments in BufFileDumpBuffer().
+		 */
+		BufFileTweak(tweak, &file->common, true);
+		encrypt_block(file->common.buffer.data,
+					  encrypt_buf.data,
+					  BLCKSZ,
+					  tweak,
+					  false);
+		write_ptr = encrypt_buf.data;
+		bytestowrite = BLCKSZ;
+	}
 retry:
 	errno = 0;
 	pgstat_report_wait_start(WAIT_EVENT_BUFFILE_WRITE);
 	nwritten = pg_pwrite(file->fd,
-						 file->common.buffer.data,
-						 file->common.nbytes,
+						 write_ptr,
+						 bytestowrite,
 						 file->common.curOffset);
 	pgstat_report_wait_end();
 
@@ -981,6 +1716,43 @@ retry:
 		return;					/* failed to write */
 	}
 
+	file->common.curOffset += nwritten;
+
+	if (data_encrypted)
+	{
+		off_t		useful;
+
+		/*
+		 * The number of useful bytes in file.
+		 */
+		useful = file->common.useful[0];
+
+		/*
+		 * Have we dumped the last buffer of the segment, i.e. the one that
+		 * can contain padding?
+		 */
+		if (file->common.curOffset >= useful)
+		{
+			int			bytes_extra;
+
+			/*
+			 * Write the number of useful bytes in the file
+			 *
+			 * Do not increase curOffset afterwards. Thus we ensure that the
+			 * next buffer appended will overwrite the "useful" value just
+			 * written, instead of being appended to it.
+			 */
+			pgstat_report_wait_start(WAIT_EVENT_BUFFILE_WRITE);
+			bytes_extra = pg_pwrite(file->fd,
+									(char *) &useful,
+									sizeof(useful),
+									file->common.curOffset);
+			pgstat_report_wait_end();
+			if (bytes_extra != sizeof(useful))
+				return;			/* failed to write */
+		}
+	}
+
 	file->common.dirty = false;
 
 	file->common.pos = 0;
@@ -1034,21 +1806,110 @@ BufFileReadCommon(BufFileCommon *file, void *ptr, size_t size,
 	{
 		if (file->pos >= file->nbytes)
 		{
-			/* Try to load more data into buffer. */
-			file->curOffset += file->pos;
-			file->pos = 0;
+			/*
+			 * Neither read nor write nor seek should leave pos greater than
+			 * nbytes, regardless the data is encrypted or not. pos can only
+			 * be greater if nbytes is zero --- this situation can be caused
+			 * by BufFileSeek().
+			 */
+			Assert(file->pos == file->nbytes || file->nbytes == 0);
+
+			/*
+			 * The Assert() above implies that pos is a whole multiple of
+			 * BLCKSZ, so curOffset has meet the same encryption-specific
+			 * requirement too.
+			 */
+			Assert(file->curOffset % BLCKSZ == 0 || !data_encrypted);
+
 			file->nbytes = 0;
+			/* Try to load more data into buffer. */
+			if (!data_encrypted || file->pos % BLCKSZ == 0)
+			{
+				file->curOffset += file->pos;
+				file->pos = 0;
 
-			if (!is_transient)
-				BufFileLoadBuffer((BufFile *) file);
+				if (!is_transient)
+					BufFileLoadBuffer((BufFile *) file);
+				else
+					BufFileLoadBufferTransient((TransientBufFile *) file);
+
+				if (file->nbytes <= 0)
+					break;		/* no more data available */
+			}
 			else
-				BufFileLoadBufferTransient((TransientBufFile *) file);
+			{
+				int			nbytes_orig = file->nbytes;
+
+				/*
+				 * Given that BLCKSZ is the I/O unit for encrypted data (see
+				 * comments in BufFileDumpBuffer), we cannot add pos to
+				 * curOffset because that would make it point outside block
+				 * boundary. The only thing we can do is to reload the whole
+				 * buffer and see if more data is eventually there than the
+				 * previous load has fetched.
+				 */
+				if (!is_transient)
+					BufFileLoadBuffer((BufFile *) file);
+				else
+					BufFileLoadBufferTransient((TransientBufFile *) file);
 
-			if (file->nbytes <= 0)
-				break;			/* no more data available */
+				Assert(file->nbytes >= nbytes_orig);
+				if (file->nbytes == nbytes_orig)
+					break;		/* no more data available */
+			}
 		}
 
 		nthistime = file->nbytes - file->pos;
+
+		/*
+		 * The buffer can contain trailing zeroes because BLCKSZ is the I/O
+		 * unit for encrypted data. These are not available for reading.
+		 */
+		if (data_encrypted)
+		{
+			off_t		useful = file->useful[file->curFile];
+
+			/*
+			 * The criterion is whether the useful data ends within the
+			 * currently loaded buffer.
+			 */
+			if (useful < file->curOffset + BLCKSZ)
+			{
+				int			avail;
+
+				/*
+				 * Compute the number of bytes available in the current
+				 * buffer.
+				 */
+				avail = useful - file->curOffset;
+				Assert(avail >= 0);
+
+				/*
+				 * An empty buffer can exist, e.g. after a seek to the end of
+				 * the last component file.
+				 */
+				if (avail == 0)
+					break;
+
+				/*
+				 * Seek beyond the current EOF, which was not followed by
+				 * write, could have resulted in position outside the useful
+				 * data
+				 */
+				if (file->pos > avail)
+					break;
+
+				nthistime = avail - file->pos;
+				Assert(nthistime >= 0);
+
+				/*
+				 * Have we reached the end of the valid data?
+				 */
+				if (nthistime == 0)
+					break;
+			}
+		}
+
 		if (nthistime > size)
 			nthistime = size;
 		Assert(nthistime > 0);
@@ -1070,8 +1931,7 @@ BufFileReadCommon(BufFileCommon *file, void *ptr, size_t size,
  * Functionality needed by both BufFileWrite() and BufFileWriteTransient().
  */
 static size_t
-BufFileWriteCommon(BufFileCommon *file, void *ptr, size_t size,
-				   bool is_transient)
+BufFileWriteCommon(BufFileCommon *file, void *ptr, size_t size, bool is_transient)
 {
 	size_t		nwritten = 0;
 	size_t		nthistime;
@@ -1097,11 +1957,28 @@ BufFileWriteCommon(BufFileCommon *file, void *ptr, size_t size,
 			{
 				Assert(!is_transient);
 
-				/* Hmm, went directly from reading to writing? */
+				/*
+				 * Hmm, went directly from reading to writing?
+				 *
+				 * As pos should be exactly BLCKSZ, there is nothing special
+				 * to do about data_encrypted, except for zeroing the buffer.
+				 */
+				Assert(file->pos == BLCKSZ);
+
 				file->curOffset += file->pos;
 				file->pos = 0;
 				file->nbytes = 0;
+
+				/* See makeBufFile() */
+				if (data_encrypted)
+					MemSet(file->buffer.data, 0, BLCKSZ);
 			}
+
+			/*
+			 * If curOffset changed above, it should still meet the assumption
+			 * that buffer is the I/O unit for encrypted data.
+			 */
+			Assert(file->curOffset % BLCKSZ == 0 || !data_encrypted);
 		}
 
 		nthistime = BLCKSZ - file->pos;
@@ -1115,6 +1992,82 @@ BufFileWriteCommon(BufFileCommon *file, void *ptr, size_t size,
 		file->pos += nthistime;
 		if (file->nbytes < file->pos)
 			file->nbytes = file->pos;
+
+		if (data_encrypted)
+		{
+			off_t		new_offset;
+			int			fileno;
+
+			if (!is_transient)
+			{
+				fileno = file->curFile;
+
+				/*
+				 * curFile does not necessarily correspond to the offset: it
+				 * can still have the initial value if BufFileSeek() skipped
+				 * the previous file w/o dumping anything of it. While curFile
+				 * will be fixed during the next dump, we need valid fileno
+				 * now.
+				 */
+				if (file->curOffset >= MAX_PHYSICAL_FILESIZE)
+				{
+					/*
+					 * Even BufFileSeek() should not allow curOffset to become
+					 * more than MAX_PHYSICAL_FILESIZE (if caller passes
+					 * higher offset, curFile gets increased instead).
+					 */
+					Assert(file->curOffset == MAX_PHYSICAL_FILESIZE);
+
+					fileno++;
+				}
+
+				/*
+				 * fileno can now point to a segment that does not exist on
+				 * disk yet.
+				 */
+				ensureUsefulArraySize(file, fileno + 1);
+
+				/*
+				 * Update the "useful offset" of the underlying component file
+				 * if we've added any useful data.
+				 */
+				new_offset = file->curOffset + file->pos;
+
+				/*
+				 * Make sure the offset is relative to the correct component
+				 * file.  It should have been adjusted during sequential
+				 * write, but if the buffer hasn't been dumped since
+				 * BufFileSeek() had set curOffset to the end of the last
+				 * segment, new_offset is now relative to the start of that
+				 * *previously last* segment. Make sure it's relative to the
+				 * new last (fileno) segment.
+				 */
+				if (file->curOffset % MAX_PHYSICAL_FILESIZE == 0)
+					new_offset %= MAX_PHYSICAL_FILESIZE;
+
+				/*
+				 * Adjust the number of useful bytes in the file if needed.
+				 * This has to happen immediately, independent from
+				 * BufFileDumpBuffer(), so that BufFileRead() works correctly
+				 * anytime.
+				 */
+				if (new_offset > file->useful[fileno])
+					file->useful[fileno] = new_offset;
+			}
+			else
+			{
+				/*
+				 * Transient file is a single file on the disk, so the whole
+				 * thing is much simpler.
+				 */
+				fileno = 0;
+
+				new_offset = file->curOffset + file->pos;
+				if (new_offset > file->useful[fileno])
+					file->useful[fileno] = new_offset;
+			}
+		}
+
 		ptr = (void *) ((char *) ptr + nthistime);
 		size -= nthistime;
 		nwritten += nthistime;
diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c
index 30f6200a86..b98e9c10d8 100644
--- a/src/backend/storage/file/copydir.c
+++ b/src/backend/storage/file/copydir.c
@@ -23,24 +23,29 @@
 #include <sys/stat.h>
 
 #include "storage/copydir.h"
+#include "storage/encryption.h"
 #include "storage/fd.h"
+#include "storage/reinit.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 
 /*
  * copydir: copy a directory
  *
- * If recurse is false, subdirectories are ignored.  Anything that's not
- * a directory or a regular file is ignored.
+ * RelFileNode values must specify tablespace and database oids for source
+ * and target to support re-encryption if necessary. relNode value in provided
+ * structs will be clobbered.
  */
 void
-copydir(char *fromdir, char *todir, bool recurse)
+copydir(char *fromdir, char *todir, RelFileNode *fromNode, RelFileNode *toNode)
 {
 	DIR		   *xldir;
 	struct dirent *xlde;
 	char		fromfile[MAXPGPATH * 2];
 	char		tofile[MAXPGPATH * 2];
 
+	Assert(!data_encrypted || (fromNode != NULL && toNode != NULL));
+
 	if (MakePGDirectory(todir) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -67,14 +72,32 @@ copydir(char *fromdir, char *todir, bool recurse)
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m", fromfile)));
 
-		if (S_ISDIR(fst.st_mode))
+		if (S_ISREG(fst.st_mode))
 		{
-			/* recurse to handle subdirectories */
-			if (recurse)
-				copydir(fromfile, tofile, true);
+			int			oidchars;
+			ForkNumber	forkNum;
+			int			segment;
+
+			/*
+			 * For encrypted databases we need to reencrypt files with new
+			 * tweaks.
+			 */
+			if (data_encrypted &&
+				parse_filename_for_nontemp_relation(xlde->d_name,
+													&oidchars, &forkNum, &segment))
+			{
+				char		oidbuf[OIDCHARS + 1];
+
+				memcpy(oidbuf, xlde->d_name, oidchars);
+				oidbuf[oidchars] = '\0';
+
+				/* We scribble over the provided RelFileNodes here */
+				fromNode->relNode = toNode->relNode = atol(oidbuf);
+				copy_file(fromfile, tofile, fromNode, toNode, forkNum, forkNum, segment);
+			}
+			else
+				copy_file(fromfile, tofile, NULL, NULL, 0, 0, 0);
 		}
-		else if (S_ISREG(fst.st_mode))
-			copy_file(fromfile, tofile);
 	}
 	FreeDir(xldir);
 
@@ -121,17 +144,22 @@ copydir(char *fromdir, char *todir, bool recurse)
 }
 
 /*
- * copy one file
+ * copy one file. If decryption and reencryption may be needed specify
+ * relfilenodes for source and target.
  */
 void
-copy_file(char *fromfile, char *tofile)
+copy_file(char *fromfile, char *tofile, RelFileNode *fromNode,
+		  RelFileNode *toNode, ForkNumber fromForkNum, ForkNumber toForkNum,
+		  int segment)
 {
 	char	   *buffer;
 	int			srcfd;
 	int			dstfd;
 	int			nbytes;
+	int			bytesread;
 	off_t		offset;
 	off_t		flush_offset;
+	BlockNumber blockNum = segment * RELSEG_SIZE;
 
 	/* Size of copy buffer (read and write requests) */
 #define COPY_BUF_SIZE (8 * BLCKSZ)
@@ -186,15 +214,50 @@ copy_file(char *fromfile, char *tofile)
 			flush_offset = offset;
 		}
 
-		pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_READ);
-		nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
-		pgstat_report_wait_end();
-		if (nbytes < 0)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", fromfile)));
+		/*
+		 * Try to read as much as we fit in the buffer so we can deal with
+		 * complete blocks if we need to reencrypt.
+		 */
+		nbytes = 0;
+		while (nbytes < COPY_BUF_SIZE)
+		{
+			pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_READ);
+			bytesread = read(srcfd, buffer + nbytes, COPY_BUF_SIZE - nbytes);
+			pgstat_report_wait_end();
+			if (bytesread < 0)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", fromfile)));
+			nbytes += bytesread;
+			if (bytesread == 0)
+				break;
+		}
 		if (nbytes == 0)
 			break;
+
+		/*
+		 * If the database is encrypted we need to decrypt the data here and
+		 * reencrypt it to adjust the tweak values of blocks.
+		 */
+		if (data_encrypted)
+		{
+			if (fromNode != NULL)
+			{
+				Assert(toNode != NULL);
+
+				/*
+				 * There's no reason not to have whole number of pages read
+				 * Computation of the number of blocks below relies on this
+				 * fact.
+				 */
+				Assert(nbytes % BLCKSZ == 0);
+
+				blockNum = ReencryptBlock(buffer, nbytes / BLCKSZ,
+										  fromNode, toNode, fromForkNum,
+										  toForkNum, blockNum);
+			}
+		}
+
 		errno = 0;
 		pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_WRITE);
 		if ((int) write(dstfd, buffer, nbytes) != nbytes)
diff --git a/src/backend/storage/file/encryption.c b/src/backend/storage/file/encryption.c
new file mode 100644
index 0000000000..d776c2667f
--- /dev/null
+++ b/src/backend/storage/file/encryption.c
@@ -0,0 +1,874 @@
+/*-------------------------------------------------------------------------
+ *
+ * encryption.c
+ *	  This code handles encryption and decryption of data.
+ *
+ * Copyright (c) 20119, PostgreSQL Global Development Group
+ *
+ * See src/backend/storage/file/README.encryption for explanation of the
+ * design.
+ *
+ * IDENTIFICATION
+ *	  src/backend/storage/file/encryption.c
+ *
+ * NOTES
+ *		This file is compiled as both front-end and backend code, so the
+ *		FRONTEND macro must be used to distinguish the case if we need to
+ *		report error or if server-defined variable / function seems useful.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "common/fe_memutils.h"
+#include "common/sha2.h"
+#include "common/string.h"
+#include "catalog/pg_control.h"
+#include "storage/encryption.h"
+
+#ifndef FRONTEND
+#include "pgstat.h"
+#include "storage/fd.h"
+#include "utils/memutils.h"
+#include "miscadmin.h"
+#include "port.h"
+#endif							/* FRONTEND */
+
+#ifdef USE_ENCRYPTION
+#include <openssl/conf.h>
+#include <openssl/evp.h>
+#include <openssl/err.h>
+
+EVP_CIPHER_CTX *ctx_encrypt,
+		   *ctx_decrypt,
+		   *ctx_encrypt_stream,
+		   *ctx_decrypt_stream;
+#endif							/* USE_ENCRYPTION */
+
+const char *encryption_key_prefix = "encryption_key=";
+const char *encryption_pwd_prefix = "encryption_password=";
+
+/* Key to encrypt / decrypt data. */
+unsigned char encryption_key[ENCRYPTION_KEY_LENGTH];
+
+bool		data_encrypted = false;
+
+char	   *encryption_key_command = NULL;
+
+PGAlignedBlock encrypt_buf;
+char	   *encrypt_buf_xlog = NULL;
+
+static void setup_encryption_internal(void);
+
+#ifdef USE_ENCRYPTION
+static void init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool stream);
+static void evp_error(void);
+#endif							/* USE_ENCRYPTION */
+
+/*
+ * Pointer to the KDF parameters.
+ *
+ * XXX Rename this and the write / read functions so they contain the
+ * 'keysetup' string?
+ */
+KDFParamsData *KDFParams = NULL;
+
+/*
+ * Initialize encryption subsystem for use. Must be called before any
+ * encryptable data is read from or written to data directory.
+ *
+ * data_dir should be passed if the current directory is different. It's
+ * needed to locate KDF file.
+ */
+void
+setup_encryption(bool bootstrap, char *data_dir)
+{
+	char	   *credentials;
+	bool		is_key;
+	size_t		len;
+
+	credentials = run_encryption_key_command(encryption_key_command,
+											 &is_key,
+											 &len);
+
+	/*
+	 * Setup KDF if we need to derive the key from a password.
+	 */
+	if (!is_key)
+	{
+		if (bootstrap)
+		{
+#ifndef FRONTEND
+			write_kdf_file();
+#endif							/* FRONTEND */
+		}
+		else
+			read_kdf_file(data_dir);
+	}
+
+	setup_encryption_key(credentials, is_key, len);
+	pfree(credentials);
+	setup_encryption_internal();
+}
+
+/*
+ * Initialize KDFParamsData and write it to a file.
+ *
+ * This is very similar to WriteControlFile().
+ */
+#ifndef FRONTEND
+void
+write_kdf_file(void)
+{
+	KDFParamsPBKDF2 *params;
+	int			i,
+				fd;
+
+	StaticAssertStmt(sizeof(KDFParamsData) <= KDF_PARAMS_FILE_SIZE,
+					 "kdf file is too large for atomic disk writes");
+
+	/*
+	 * The initialization should not be repeated.
+	 */
+	Assert(KDFParams == NULL);
+
+	KDFParams = MemoryContextAllocZero(TopMemoryContext,
+									   KDF_PARAMS_FILE_SIZE);
+	KDFParams->function = KDF_OPENSSL_PKCS5_PBKDF2_HMAC_SHA;
+	params = &KDFParams->data.pbkdf2;
+	params->niter = ENCRYPTION_KDF_NITER;
+	for (i = 0; i < ENCRYPTION_KDF_SALT_LEN; i++)
+		params->salt[i] = (unsigned char) random();
+
+	/* Contents are protected with a CRC */
+	INIT_CRC32C(KDFParams->crc);
+	COMP_CRC32C(KDFParams->crc,
+				(char *) KDFParams,
+				offsetof(KDFParamsData, crc));
+	FIN_CRC32C(KDFParams->crc);
+
+	fd = BasicOpenFile(KDF_PARAMS_FILE,
+					   O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
+	if (fd < 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not create key setup file \"%s\": %m",
+						KDF_PARAMS_FILE)));
+
+	pgstat_report_wait_start(WAIT_EVENT_KDF_FILE_WRITE);
+	if (write(fd, KDFParams, KDF_PARAMS_FILE_SIZE) != KDF_PARAMS_FILE_SIZE)
+	{
+		/* if write didn't set errno, assume problem is no disk space */
+		if (errno == 0)
+			errno = ENOSPC;
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not write to key setup file: %m")));
+	}
+	pgstat_report_wait_end();
+
+	pgstat_report_wait_start(WAIT_EVENT_KDF_FILE_SYNC);
+	if (pg_fsync(fd) != 0)
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync key setup file: %m")));
+	pgstat_report_wait_end();
+
+	if (close(fd))
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not close key setup file: %m")));
+}
+#endif							/* FRONTEND */
+
+/*
+ * Read KDFParamsData from file and store it in local memory.
+ *
+ * If dir is NULL, assume we're in the data directory.
+ *
+ * postmaster should call the function early enough for any other process to
+ * inherit valid pointer to the data.
+ */
+void
+read_kdf_file(char *dir)
+{
+	pg_crc32c	crc;
+	int			fd;
+	char		path[MAXPGPATH];
+	int			written = 0;
+	int			remains;
+
+	if (dir)
+	{
+		written = snprintf(path, MAXPGPATH, "%s/", dir);
+		if (written >= MAXPGPATH)
+			encryption_error(true, "KDF directory too long");
+	}
+
+	remains = MAXPGPATH - written;
+	if (remains < (strlen(KDF_PARAMS_FILE) + 1))
+	{
+		/*
+		 * KDF_PARAMS_FILE should always fit in MAXPGPATH, so directory length
+		 * must be the problem.
+		 */
+		encryption_error(true, "KDF directory too long");
+	}
+
+	snprintf(path + written, remains, "%s", KDF_PARAMS_FILE);
+
+#ifndef FRONTEND
+	KDFParams = MemoryContextAllocZero(TopMemoryContext,
+									   KDF_PARAMS_FILE_SIZE);
+	fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+#else
+	KDFParams = palloc(KDF_PARAMS_FILE_SIZE);
+	fd = open(path, O_RDONLY | PG_BINARY, S_IRUSR);
+#endif
+
+	if (fd < 0)
+		encryption_error(true, "could not open key setup file");
+
+#ifndef FRONTEND
+	pgstat_report_wait_start(WAIT_EVENT_KDF_FILE_READ);
+#endif
+
+	if (read(fd, KDFParams, sizeof(KDFParamsData)) != sizeof(KDFParamsData))
+		encryption_error(true, "could not read from key setup file");
+
+#ifndef FRONTEND
+	pgstat_report_wait_end();
+#endif
+
+	close(fd);
+
+	/* Now check the CRC. */
+	INIT_CRC32C(crc);
+	COMP_CRC32C(crc,
+				(char *) KDFParams,
+				offsetof(KDFParamsData, crc));
+	FIN_CRC32C(crc);
+
+	if (!EQ_CRC32C(crc, KDFParams->crc))
+		encryption_error(true, "incorrect checksum in key setup file");
+
+
+	if (KDFParams->function != KDF_OPENSSL_PKCS5_PBKDF2_HMAC_SHA)
+		encryption_error(true, "unsupported KDF function");
+}
+
+/*
+ * Encrypts a fixed value into *buf to verify that encryption key is correct.
+ * Caller provided buf needs to be able to hold at least ENCRYPTION_SAMPLE_SIZE
+ * bytes.
+ */
+void
+sample_encryption(char *buf)
+{
+	char		tweak[TWEAK_SIZE];
+	int			i;
+
+	for (i = 0; i < TWEAK_SIZE; i++)
+		tweak[i] = i;
+
+	encrypt_block("postgresqlcrypt", buf, ENCRYPTION_SAMPLE_SIZE, tweak,
+				  false);
+}
+
+/*
+ * Encrypts one block of data with a specified tweak value. May only be called
+ * when encryption_enabled is true.
+ *
+ * Input and output buffer may point to the same location.
+ *
+ * "size" must be a (non-zero) multiple of ENCRYPTION_BLOCK.
+ *
+ * "tweak" value must be TWEAK_SIZE bytes long.
+ *
+ * If "stream" is set, stream cipher is used instead of block one.
+ *
+ * All-zero blocks are not encrypted to correctly handle relation extension,
+ * and also to simplify handling of holes created by seek past EOF and
+ * consequent write (see buffile.c).
+ */
+void
+encrypt_block(const char *input, char *output, Size size, char *tweak,
+			  bool stream)
+{
+#ifdef USE_ENCRYPTION
+	int			out_size;
+	EVP_CIPHER_CTX *ctx;
+
+	Assert(data_encrypted);
+
+	/*
+	 * Block cipher should only be used if the size is whole multiple of
+	 * encryption block size.
+	 */
+	Assert((size >= ENCRYPTION_BLOCK && size % ENCRYPTION_BLOCK == 0) ||
+		   stream);
+
+	/*
+	 * Empty page is not worth encryption. Do not waste cycles checking for
+	 * stream cipher as this is currently used only for XLOG pages, and empty
+	 * XLOG page should not be written to disk.
+	 */
+	if (!stream && IsAllZero(input, size))
+	{
+		memset(output, 0, size);
+		return;
+	}
+
+	ctx = !stream ? ctx_encrypt : ctx_encrypt_stream;
+
+	/* The remaining initialization. */
+	if (EVP_EncryptInit_ex(ctx, NULL, NULL, encryption_key,
+						   (unsigned char *) tweak) != 1)
+		evp_error();
+
+	/* Do the actual encryption. */
+	if (EVP_EncryptUpdate(ctx, (unsigned char *) output,
+						  &out_size, (unsigned char *) input, size) != 1)
+		evp_error();
+
+	Assert(out_size == size);
+#else
+	/* data_encrypted should not be set */
+	Assert(false);
+#endif							/* USE_ENCRYPTION */
+}
+
+/*
+ * Decrypts one block of data with a specified tweak value. May only be called
+ * when encryption_enabled is true.
+ *
+ * Input and output buffer may point to the same location.
+ *
+ * For detailed comments see encrypt_block().
+ */
+void
+decrypt_block(const char *input, char *output, Size size, char *tweak,
+			  bool stream)
+{
+#ifdef USE_ENCRYPTION
+	int			out_size;
+	EVP_CIPHER_CTX *ctx;
+
+	Assert(data_encrypted);
+	Assert((size >= ENCRYPTION_BLOCK && size % ENCRYPTION_BLOCK == 0) ||
+		   stream);
+
+	if (!stream && IsAllZero(input, size))
+	{
+		memset(output, 0, size);
+		return;
+	}
+
+	ctx = !stream ? ctx_decrypt : ctx_decrypt_stream;
+
+	/* The remaining initialization. */
+	if (EVP_DecryptInit_ex(ctx, NULL, NULL, encryption_key,
+						   (unsigned char *) tweak) != 1)
+		evp_error();
+
+	/* Do the actual encryption. */
+	if (EVP_DecryptUpdate(ctx, (unsigned char *) output,
+						  &out_size, (unsigned char *) input, size) != 1)
+		evp_error();
+
+	Assert(out_size == size);
+#else
+	/* data_encrypted should not be set */
+	Assert(false);
+#endif							/* USE_ENCRYPTION */
+}
+
+/*
+ * Report an error in an universal way so that caller does not have to care
+ * whether it executes in backend or front-end.
+ */
+void
+encryption_error(bool fatal, char *message)
+{
+#ifndef FRONTEND
+	/*
+	 * Fatal is actually PANIC so that the encryption code never uses lower
+	 * elevel than the code around, e.g. that in ReadControlFile().
+	 */
+	elog(fatal ? PANIC : INFO, "%s", message);
+#else
+	fprintf(stderr, "%s\n", message);
+	if (fatal)
+		exit(EXIT_FAILURE);
+#endif
+}
+
+/*
+ * If credentials is a key, just copy it to encryption_key. If it's a
+ * password, derive the key from it.
+ */
+void
+setup_encryption_key(char *credentials, bool is_key, size_t len)
+{
+#ifdef USE_ENCRYPTION
+	Assert(credentials != NULL);
+
+	if (is_key)
+	{
+		Assert(len == ENCRYPTION_KEY_LENGTH);
+		memcpy(encryption_key, credentials, len);
+	}
+	else
+	{
+		KDFParamsPBKDF2 *params;
+		int			rc;
+
+		/*
+		 * The file contains password so we need the KDF parameters to turn it
+		 * to key.
+		 */
+		if (KDFParams == NULL)
+		{
+#ifndef FRONTEND
+			ereport(FATAL,
+					(errmsg("this instance does not accept encryption password"),
+					 errdetail("Encryption key was probably used to initialize the instance.")));
+#else
+			encryption_error(true,
+							 "this instance does not accept encryption password.\n"
+							 "Encryption key was probably used to initialize the instance.\n");
+#endif							/* FRONTEND */
+		}
+
+		/*
+		 * Turn the password into the encryption key.
+		 */
+		params = &KDFParams->data.pbkdf2;
+		rc = PKCS5_PBKDF2_HMAC(credentials,
+							   len,
+							   params->salt,
+							   ENCRYPTION_KDF_SALT_LEN,
+							   params->niter,
+							   EVP_sha1(),
+							   ENCRYPTION_KEY_LENGTH,
+							   encryption_key);
+
+		if (rc != 1)
+		{
+#ifndef FRONTEND
+			ereport(FATAL,
+					(errmsg("failed to derive key from password")));
+#else
+			encryption_error(true, "failed to derive key from password");
+#endif							/* FRONTEND */
+		}
+	}
+#else
+	/*
+	 * If no encryption implementation is linked and caller requests
+	 * encryption, we should error out here and thus cause the calling process
+	 * to fail (preferably postmaster, so the child processes don't make the
+	 * same mistake).
+	 *
+	 * We could actually #ifdef only the call of the KDF above, but that would
+	 * be inconsistent, and the following ERROR would have to be raised
+	 * elsewhere in addition.
+	 */
+	ENCRYPTION_NOT_SUPPORTED_MSG;
+#endif							/* USE_ENCRYPTION */
+}
+
+static void
+setup_encryption_internal(void)
+{
+#ifdef USE_ENCRYPTION
+	/*
+	 * Setup OpenSSL.
+	 *
+	 * None of these functions should return a value or raise error.
+	 */
+	ERR_load_crypto_strings();
+	OpenSSL_add_all_algorithms();
+
+	/*
+	 * TODO Find out if this needs to be called for OpenSSL < 1.1.0.
+	 */
+	/* OPENSSL_config(NULL); */
+
+	init_encryption_context(&ctx_encrypt, false);
+	init_encryption_context(&ctx_decrypt, false);
+
+	init_encryption_context(&ctx_encrypt_stream, true);
+	init_encryption_context(&ctx_decrypt_stream, true);
+
+	/*
+	 * Unlike encrypt_buf we need multiple pages here, so allocate the memory
+	 * dynamically. That also ensures it'll be MAXALIGNed, which is useful
+	 * because the buffer will be used for I/O.
+	 *
+	 * Use TopMemoryContext because on server side this code is run by
+	 * postmaster and postmaster context gets freed after fork().
+	 */
+
+	/*
+	 * The same for encrypt_buf_xlog, except that this one spans multiple
+	 * pages.
+	 */
+#ifndef FRONTEND
+	encrypt_buf_xlog = (char *) MemoryContextAlloc(TopMemoryContext,
+												   ENCRYPT_BUF_XLOG_SIZE);
+#else
+	encrypt_buf_xlog = (char *) palloc(ENCRYPT_BUF_XLOG_SIZE);
+#endif
+
+#else
+	/* setup_encryption_key() should have been called (and ERROR) by now. */
+	Assert(false);
+#endif							/* USE_ENCRYPTION */
+}
+
+#ifdef USE_ENCRYPTION
+/*
+ * Initialize the OpenSSL context for passed cipher.
+ *
+ * On server side this happens during postmaster startup, so other processes
+ * inherit the initialized context via fork(). There's no reason to this again
+ * and again in encrypt_block() / decrypt_block(), also because we cannot
+ * handle out-of-memory conditions encountered by OpenSSL in another way than
+ * ereport(FATAL). The OOM is much less likely to happen during postmaster
+ * startup, and even if it happens, troubleshooting should be easier than if
+ * it happened during normal operation.
+ *
+ * XXX Do we need to call EVP_CIPHER_CTX_cleanup() (via on_proc_exit callback
+ * for server processes and other way for front-ends)? Not sure it's
+ * necessary, as the initialization does not involve any shared resources
+ * (e.g. files).
+ */
+static void
+init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool stream)
+{
+	EVP_CIPHER_CTX *ctx;
+	const EVP_CIPHER *cipher;
+#ifdef USE_ASSERT_CHECKING
+	int			block_size;
+#endif							/* USE_ASSERT_CHECKING */
+
+	cipher = !stream ? EVP_aes_256_cbc() : EVP_aes_256_ctr();
+
+	if ((*ctx_p = EVP_CIPHER_CTX_new()) == NULL)
+		evp_error();
+	ctx = *ctx_p;
+	if (EVP_EncryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1)
+		evp_error();
+
+	/*
+	 * No padding is needed. For a block cipher, the input block size should
+	 * already be a multiple of ENCRYPTION_BLOCK. For stream cipher, we don't
+	 * need padding anyway. This might save some cycles at the OpenSSL end.
+	 * XXX Is it setting worth when we don't call EVP_DecryptFinal_ex()
+	 * anyway?
+	 */
+	EVP_CIPHER_CTX_set_padding(ctx, 0);
+
+	Assert(EVP_CIPHER_CTX_iv_length(ctx) == TWEAK_SIZE);
+	Assert(EVP_CIPHER_CTX_key_length(ctx) == ENCRYPTION_KEY_LENGTH);
+	block_size = EVP_CIPHER_CTX_block_size(ctx);
+#ifdef USE_ASSERT_CHECKING
+	if (!stream)
+		Assert(block_size == ENCRYPTION_BLOCK);
+	else
+		Assert(block_size == 1);
+#endif							/* USE_ASSERT_CHECKING */
+}
+
+#endif							/* USE_ENCRYPTION */
+
+/*
+ * Run the command stored in encryption_key_command and return the key or
+ * password.
+ *
+ * *is_key_p receives true if the command returns key, false if it's
+ * password. *len_p receives length of the data.
+ */
+char *
+run_encryption_key_command(const char *cmd, bool *is_key_p, size_t *len_p)
+{
+	FILE	   *fp;
+	char	   *buf,
+			   *result;
+	bool		is_key = false;
+	size_t		key_pref_len,
+				pwd_pref_len,
+				key_chars,
+				read_len,
+				bytes_read;
+	size_t		buf_size,
+				result_size;
+
+	if (cmd == NULL || !strlen(cmd))
+	{
+		/*
+		 * encryption_key_command should have been set by initdb. It's weird
+		 * if it was not, but there's no better recommendation we can give the
+		 * user.
+		 *
+		 * TODO Move this check outside the function since the function does
+		 * not care whether the "cmd" argument comes from
+		 * "encryption_key_command".
+		 */
+#ifndef FRONTEND
+		ereport(FATAL,
+				(errmsg("encryption key not provided"),
+				 errdetail("The database cluster was initialized with encryption"
+						   " but the application was started without an encryption key."),
+				 errhint("Set the encryption_key_command configuration variable.")));
+#else							/* FRONTEND */
+		encryption_error(true,
+						 "The database cluster was initialized with encryption"
+						 " but the server was started without an encryption key. "
+						 "Set the encryption_key_command configuration variable.\n");
+#endif							/* FRONTEND */
+	}
+
+	encryption_error(false,
+					 psprintf("Executing \"%s\" to set up encryption key",
+							  cmd));
+
+	fp = popen(cmd, "r");
+	if (fp == NULL)
+		encryption_error(true,
+						 psprintf("Failed to execute \"%s\"", cmd));
+
+	/*
+	 * Check which prefix the file starts with.
+	 *
+	 * The prefixes probably won't change after the release but they might
+	 * change during development. The reading logic should be generic so that
+	 * change of prefix length requires no additional coding.
+	 */
+	key_pref_len = strlen(encryption_key_prefix);
+	pwd_pref_len = strlen(encryption_pwd_prefix);
+
+	/*
+	 * The buffer must accommodate either prefix.
+	 */
+	buf_size = Max(key_pref_len, pwd_pref_len);
+	buf = (char *) palloc(buf_size);
+
+	/*
+	 * Read as few bytes as necessary so that we don't have to move back in
+	 * the buffer if the first comparison does not match.
+	 */
+	read_len = Min(key_pref_len, pwd_pref_len);
+
+	if (fread(buf, 1, read_len, fp) != read_len)
+		encryption_error(true, "Not enough data received from encryption key command");
+
+	if (read_len == key_pref_len &&
+		strncmp(buf, encryption_key_prefix, key_pref_len) == 0)
+		is_key = true;
+	else if (read_len == pwd_pref_len &&
+			 strncmp(buf, encryption_pwd_prefix, pwd_pref_len) == 0)
+		is_key = false;
+	else if (buf_size > read_len)
+	{
+		size_t		len_diff;
+
+		/*
+		 * Read enough data so that one of the prefixes must match.
+		 */
+		len_diff = buf_size - read_len;
+		if (fread(buf + read_len, 1, len_diff, fp) != len_diff)
+			encryption_error(true,
+							 "Not enough data received from encryption key command");
+		read_len += len_diff;
+
+		/*
+		 * Try to match the prefixes again.
+		 */
+		if (read_len == key_pref_len &&
+			strncmp(buf, encryption_key_prefix, key_pref_len) == 0)
+			is_key = true;
+		else if (read_len == pwd_pref_len &&
+				 strncmp(buf, encryption_pwd_prefix, pwd_pref_len) == 0)
+			is_key = false;
+		else
+			encryption_error(true,
+							 "Unknown data received from encryption key command");
+	}
+
+	*is_key_p = is_key;
+
+	/* Key length in characters (two characters per hexadecimal digit) */
+	key_chars = ENCRYPTION_KEY_LENGTH * 2;
+
+	/*
+	 * Read the actual credentials.
+	 */
+	read_len = is_key ? key_chars : ENCRYPTION_PWD_MAX_LENGTH;
+
+	/*
+	 * Accept one extra character so that we can detect key / password too
+	 * long.
+	 */
+	read_len++;
+
+	if (read_len > buf_size)
+	{
+		buf = (char *) repalloc(buf, read_len);
+		buf_size = read_len;
+	}
+
+	bytes_read = fread(buf, 1, read_len, fp);
+	if (bytes_read == 0)
+		encryption_error(true,
+						 "Not enough data provided by encryption key command");
+
+	/*
+	 * Since read_len accommodates one extra byte, equality means that too
+	 * much data was received, unless the extra byte is line delimiter.
+	 */
+	if (bytes_read == read_len && buf[bytes_read - 1] != '\n')
+		encryption_error(true, "Too much data returned by encryption key command");
+
+	/* The line delimiter is not valid data. */
+	if (buf[bytes_read - 1] == '\n')
+		bytes_read--;
+
+	if ((is_key && bytes_read < key_chars) ||
+		(!is_key && bytes_read < ENCRYPTION_PWD_MIN_LENGTH))
+	{
+		if (feof(fp))
+			encryption_error(true,
+							 "Not enough data provided by encryption key command");
+		else
+			encryption_error(true,
+							 psprintf("encryption key command returned error code %d",
+									  ferror(fp)));
+	}
+
+	/*
+	 * For a key the result size is different from the amount of data read.
+	 */
+	result_size = is_key ? ENCRYPTION_KEY_LENGTH : ENCRYPTION_PWD_MAX_LENGTH;
+
+	result = (char *) palloc(result_size);
+
+	if (is_key)
+	{
+		int			i;
+
+		for (i = 0; i < ENCRYPTION_KEY_LENGTH; i++)
+		{
+			if (sscanf(buf + 2 * i, "%2hhx", result + i) == 0)
+				encryption_error(true,
+								 psprintf("Invalid character in encryption key at position %d",
+										  2 * i));
+		}
+		*len_p = ENCRYPTION_KEY_LENGTH;
+	}
+	else
+	{
+		*len_p = bytes_read;
+		memcpy(result, buf, *len_p);
+	}
+
+	/*
+	 * No extra data is allowed.
+	 */
+	if (fread(buf, 1, 1, fp) > 0)
+		encryption_error(true,
+						 "Credentials are followed by useless data");
+
+
+	pfree(buf);
+	pclose(fp);
+
+	return result;
+}
+
+#ifdef USE_ENCRYPTION
+/*
+ * Error callback for openssl.
+ */
+static void
+evp_error(void)
+{
+	ERR_print_errors_fp(stderr);
+#ifndef FRONTEND
+
+	/*
+	 * FATAL is the appropriate level because backend can hardly fix anything
+	 * if encryption / decryption has failed.
+	 *
+	 * XXX Do we yet need EVP_CIPHER_CTX_cleanup() here?
+	 */
+	elog(FATAL, "OpenSSL encountered error during encryption or decryption.");
+#else
+	fprintf(stderr,
+			"OpenSSL encountered error during encryption or decryption.");
+	exit(EXIT_FAILURE);
+#endif							/* FRONTEND */
+}
+#endif							/* USE_ENCRYPTION */
+
+/*
+ * Xlog is encrypted page at a time. Each xlog page gets a unique tweak via
+ * timeline, segment and offset.
+ *
+ * The function is located here rather than some of the xlog*.c modules so
+ * that front-end applications can easily use it too.
+ */
+void
+XLogEncryptionTweak(char *tweak, TimeLineID timeline, XLogSegNo segment,
+					uint32 offset)
+{
+	memset(tweak, 0, TWEAK_SIZE);
+	memcpy(tweak, &timeline, sizeof(timeline));
+	tweak += sizeof(timeline);
+	memcpy(tweak, &segment, sizeof(XLogSegNo));
+	tweak += sizeof(XLogSegNo);
+	memcpy(tweak, &offset, sizeof(offset));
+}
+
+/*
+ * Copying relations between tablespaces/databases means that the tweak values
+ * of each block will change. This function transcodes a series of blocks with
+ * new tweak values. Returns the new block number for convenience.
+ */
+BlockNumber
+ReencryptBlock(char *buffer, int blocks,
+			   RelFileNode *srcNode, RelFileNode *dstNode,
+			   ForkNumber srcForkNum, ForkNumber dstForkNum,
+			   BlockNumber blockNum)
+{
+	char	   *cur;
+	char		srcTweak[TWEAK_SIZE];
+	char		dstTweak[TWEAK_SIZE];
+
+	for (cur = buffer; cur < buffer + blocks * BLCKSZ; cur += BLCKSZ)
+	{
+		mdtweak(srcTweak, srcNode, srcForkNum, blockNum);
+		mdtweak(dstTweak, dstNode, dstForkNum, blockNum);
+		decrypt_block(cur, cur, BLCKSZ, srcTweak, false);
+		encrypt_block(cur, cur, BLCKSZ, dstTweak, false);
+		blockNum++;
+	}
+	return blockNum;
+}
+
+/*
+ * md files are encrypted block at a time. Tweak will alias higher numbered
+ * forks for huge tables.
+ */
+void
+mdtweak(char *tweak, RelFileNode *relnode, ForkNumber forknum, BlockNumber blocknum)
+{
+	uint32		fork_and_block = (forknum << 24) ^ blocknum;
+
+	memcpy(tweak, relnode, sizeof(RelFileNode));
+	memcpy(tweak + sizeof(RelFileNode), &fork_and_block, 4);
+}
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 1ead778cc9..6af20cfe64 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -16,6 +16,7 @@
 
 #include <unistd.h>
 
+#include "catalog/pg_tablespace.h"
 #include "common/relpath.h"
 #include "storage/copydir.h"
 #include "storage/fd.h"
@@ -24,9 +25,9 @@
 #include "utils/memutils.h"
 
 static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
-									  int op);
+									  int op, Oid spcOid);
 static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
-								   int op);
+								   int op, Oid spcOid, Oid dbOid);
 
 typedef struct
 {
@@ -68,7 +69,7 @@ ResetUnloggedRelations(int op)
 	/*
 	 * First process unlogged files in pg_default ($PGDATA/base)
 	 */
-	ResetUnloggedRelationsInTablespaceDir("base", op);
+	ResetUnloggedRelationsInTablespaceDir("base", op, DEFAULTTABLESPACE_OID);
 
 	/*
 	 * Cycle through directories for all non-default tablespaces.
@@ -77,13 +78,16 @@ ResetUnloggedRelations(int op)
 
 	while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
 	{
+		Oid			spcOid;
+
 		if (strcmp(spc_de->d_name, ".") == 0 ||
 			strcmp(spc_de->d_name, "..") == 0)
 			continue;
 
 		snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
 				 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
-		ResetUnloggedRelationsInTablespaceDir(temp_path, op);
+		spcOid = atoi(spc_de->d_name);
+		ResetUnloggedRelationsInTablespaceDir(temp_path, op, spcOid);
 	}
 
 	FreeDir(spc_dir);
@@ -99,7 +103,8 @@ ResetUnloggedRelations(int op)
  * Process one tablespace directory for ResetUnloggedRelations
  */
 static void
-ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
+ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op,
+									  Oid spcOid)
 {
 	DIR		   *ts_dir;
 	struct dirent *de;
@@ -126,6 +131,8 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 
 	while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
 	{
+		Oid			dbOid;
+
 		/*
 		 * We're only interested in the per-database directories, which have
 		 * numeric names.  Note that this code will also (properly) ignore "."
@@ -134,9 +141,10 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 		if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
 			continue;
 
+		dbOid = atoi(de->d_name);
 		snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
 				 tsdirname, de->d_name);
-		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
+		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op, spcOid, dbOid);
 	}
 
 	FreeDir(ts_dir);
@@ -146,7 +154,8 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
  * Process one per-dbspace directory for ResetUnloggedRelations
  */
 static void
-ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
+ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op,
+								   Oid spcOid, Oid dbOid)
 {
 	DIR		   *dbspace_dir;
 	struct dirent *de;
@@ -187,7 +196,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, NULL))
 				continue;
 
 			/* Also skip it unless this is the init fork. */
@@ -229,7 +238,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, NULL))
 				continue;
 
 			/* We never remove the init fork. */
@@ -279,13 +288,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 		{
 			ForkNumber	forkNum;
 			int			oidchars;
+			int			segment;
 			char		oidbuf[OIDCHARS + 1];
 			char		srcpath[MAXPGPATH * 2];
 			char		dstpath[MAXPGPATH];
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, &segment))
 				continue;
 
 			/* Also skip it unless this is the init fork. */
@@ -305,7 +315,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* OK, we're ready to perform the actual copy. */
 			elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
-			copy_file(srcpath, dstpath);
+			{
+				RelFileNode srcNode = {spcOid, dbOid, atol(oidbuf)};
+				RelFileNode dstNode = srcNode;
+
+				copy_file(srcpath, dstpath, &srcNode, &dstNode,
+						  INIT_FORKNUM, MAIN_FORKNUM, segment);
+			}
 		}
 
 		FreeDir(dbspace_dir);
@@ -327,7 +343,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, NULL))
 				continue;
 
 			/* Also skip it unless this is the init fork. */
@@ -372,9 +388,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
  */
 bool
 parse_filename_for_nontemp_relation(const char *name, int *oidchars,
-									ForkNumber *fork)
+									ForkNumber *fork, int *segment)
 {
 	int			pos;
+	int			segstart = 0;
 
 	/* Look for a non-empty string of digits (that isn't too long). */
 	for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
@@ -401,6 +418,7 @@ parse_filename_for_nontemp_relation(const char *name, int *oidchars,
 	{
 		int			segchar;
 
+		segstart = pos + 1;
 		for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
 			;
 		if (segchar <= 1)
@@ -411,5 +429,14 @@ parse_filename_for_nontemp_relation(const char *name, int *oidchars,
 	/* Now we should be at the end. */
 	if (name[pos] != '\0')
 		return false;
+
+	if (segment != NULL)
+	{
+		if (segstart == 0)
+			*segment = 0;
+		else
+			*segment = atoi(name + segstart);
+	}
+
 	return true;
 }
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 14bc61b8ad..ebc50320e1 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -17,6 +17,7 @@
 #include "access/htup_details.h"
 #include "access/itup.h"
 #include "access/xlog.h"
+#include "common/string.h"
 #include "pgstat.h"
 #include "storage/checksum.h"
 #include "utils/memdebug.h"
@@ -82,11 +83,8 @@ bool
 PageIsVerified(Page page, BlockNumber blkno)
 {
 	PageHeader	p = (PageHeader) page;
-	size_t	   *pagebytes;
-	int			i;
 	bool		checksum_failure = false;
 	bool		header_sane = false;
-	bool		all_zeroes = false;
 	uint16		checksum = 0;
 
 	/*
@@ -119,26 +117,8 @@ PageIsVerified(Page page, BlockNumber blkno)
 			return true;
 	}
 
-	/*
-	 * Check all-zeroes case. Luckily BLCKSZ is guaranteed to always be a
-	 * multiple of size_t - and it's much faster to compare memory using the
-	 * native word size.
-	 */
-	StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t),
-					 "BLCKSZ has to be a multiple of sizeof(size_t)");
-
-	all_zeroes = true;
-	pagebytes = (size_t *) page;
-	for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
-	{
-		if (pagebytes[i] != 0)
-		{
-			all_zeroes = false;
-			break;
-		}
-	}
-
-	if (all_zeroes)
+	/* Check all-zeroes case */
+	if (IsAllZero((char *) page, BLCKSZ))
 		return true;
 
 	/*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 61a8f11469..82265da74f 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -32,6 +32,7 @@
 #include "postmaster/bgwriter.h"
 #include "storage/fd.h"
 #include "storage/bufmgr.h"
+#include "storage/encryption.h"
 #include "storage/md.h"
 #include "storage/relfilenode.h"
 #include "storage/smgr.h"
@@ -86,6 +87,7 @@ typedef struct _MdfdVec
 
 static MemoryContext MdCxt;		/* context for all MdfdVec objects */
 
+static char *md_encryption_tweak;
 
 /* Populate a file tag describing an md.c segment file. */
 #define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \
@@ -139,6 +141,8 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
 static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
 		   MdfdVec *seg);
 
+static void mdencrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer);
+static void mddecrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer);
 
 /*
  *	mdinit() -- Initialize private state for magnetic disk storage manager.
@@ -149,6 +153,8 @@ mdinit(void)
 	MdCxt = AllocSetContextCreate(TopMemoryContext,
 								  "MdSmgr",
 								  ALLOCSET_DEFAULT_SIZES);
+
+	md_encryption_tweak = MemoryContextAllocZero(MdCxt, TWEAK_SIZE);
 }
 
 /*
@@ -401,6 +407,12 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
+	if (data_encrypted)
+	{
+		mdencrypt(reln, forknum, blocknum, buffer);
+		buffer = encrypt_buf.data;
+	}
+
 	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
 	{
 		if (nbytes < 0)
@@ -587,6 +599,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
+	char	   *buffer_read = buffer;
 
 	TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
 										reln->smgr_rnode.node.spcNode,
@@ -601,7 +614,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
-	nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
+	if (data_encrypted)
+		buffer_read = encrypt_buf.data;
+
+	nbytes = FileRead(v->mdfd_vfd, buffer_read, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
 
 	TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
 									   reln->smgr_rnode.node.spcNode,
@@ -636,6 +652,8 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 							blocknum, FilePathName(v->mdfd_vfd),
 							nbytes, BLCKSZ)));
 	}
+	else if (data_encrypted)
+		mddecrypt(reln, forknum, blocknum, buffer);
 }
 
 /*
@@ -671,6 +689,11 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
+	if (data_encrypted)
+	{
+		mdencrypt(reln, forknum, blocknum, buffer);
+		buffer = encrypt_buf.data;
+	}
 	nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
 
 	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
@@ -1248,6 +1271,22 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 	return (BlockNumber) (len / BLCKSZ);
 }
 
+
+static void
+mdencrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
+{
+	mdtweak(md_encryption_tweak, &(reln->smgr_rnode.node), forknum, blocknum);
+	encrypt_block(buffer, encrypt_buf.data, BLCKSZ, md_encryption_tweak,
+				  false);
+}
+
+static void
+mddecrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *dest)
+{
+	mdtweak(md_encryption_tweak, &(reln->smgr_rnode.node), forknum, blocknum);
+	decrypt_block(encrypt_buf.data, dest, BLCKSZ, md_encryption_tweak, false);
+}
+
 /*
  * Sync a file to disk, given a file tag.  Write the path into an output
  * buffer so the caller can use it in error messages.
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 44a59e1d4f..40459168a4 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -63,6 +63,7 @@
 #include "replication/walsender.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/bufmgr.h"
+#include "storage/encryption.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
 #include "storage/procsignal.h"
@@ -3497,7 +3498,7 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
 	 * postmaster/postmaster.c (the option sets should not conflict) and with
 	 * the common help() function in main/main.c.
 	 */
-	while ((flag = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:v:W:-:")) != -1)
+	while ((flag = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijK:k:lN:nOo:Pp:r:S:sTt:v:W:-:")) != -1)
 	{
 		switch (flag)
 		{
@@ -3555,6 +3556,12 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
 					UseSemiNewlineNewline = true;
 				break;
 
+#ifdef	USE_ENCRYPTION
+			case 'K':
+				encryption_key_command = strdup(optarg);
+				break;
+#endif							/* USE_OPENSSL */
+
 			case 'k':
 				SetConfigOption("unix_socket_directories", optarg, ctx, gucsource);
 				break;
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 83c9514856..053e2a14cd 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -1514,6 +1514,7 @@ ValidatePgVersion(const char *path)
  * GUC variables: lists of library names to be preloaded at postmaster
  * start and at backend start
  */
+char	   *encryption_library_string = NULL;
 char	   *session_preload_libraries_string = NULL;
 char	   *shared_preload_libraries_string = NULL;
 char	   *local_preload_libraries_string = NULL;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e24f003983..31b87c7097 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -72,6 +72,7 @@
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/dsm_impl.h"
+#include "storage/encryption.h"
 #include "storage/standby.h"
 #include "storage/fd.h"
 #include "storage/large_object.h"
@@ -172,6 +173,7 @@ static void assign_session_replication_role(int newval, void *extra);
 static bool check_temp_buffers(int *newval, void **extra, GucSource source);
 static bool check_bonjour(bool *newval, void **extra, GucSource source);
 static bool check_ssl(bool *newval, void **extra, GucSource source);
+static bool check_full_page_writes(bool *newval, void **extra, GucSource source);
 static bool check_stage_log_stats(bool *newval, void **extra, GucSource source);
 static bool check_log_stats(bool *newval, void **extra, GucSource source);
 static bool check_canonical_path(char **newval, void **extra, GucSource source);
@@ -216,6 +218,10 @@ static void assign_recovery_target_lsn(const char *newval, void *extra);
 static bool check_primary_slot_name(char **newval, void **extra, GucSource source);
 static bool check_default_with_oids(bool *newval, void **extra, GucSource source);
 
+#ifdef USE_ENCRYPTION
+static const char *show_encryption_key_command(void);
+#endif							/* USE_ENCRYPTION */
+
 /* Private functions in guc-file.l that need to be called from guc.c */
 static ConfigVariable *ProcessConfigFileInternal(GucContext context,
 						  bool applySettings, int elevel);
@@ -1173,7 +1179,7 @@ static struct config_bool ConfigureNamesBool[] =
 		},
 		&fullPageWrites,
 		true,
-		NULL, NULL, NULL
+		check_full_page_writes, NULL, NULL
 	},
 
 	{
@@ -1837,6 +1843,17 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
+		{"data_encryption", PGC_INTERNAL, PRESET_OPTIONS,
+			gettext_noop("Shows whether data encryption is turned on for this cluster."),
+			NULL,
+			GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
+		},
+		&data_encrypted,
+		false,
+		NULL, NULL, NULL
+	},
+
+	{
 		{"syslog_sequence_numbers", PGC_SIGHUP, LOGGING_WHERE,
 			gettext_noop("Add sequence number to syslog messages to avoid duplicate suppression."),
 			NULL
@@ -4213,6 +4230,19 @@ static struct config_string ConfigureNamesString[] =
 		NULL, NULL, NULL
 	},
 
+#ifdef	USE_ENCRYPTION
+	{
+		{"encryption_key_command", PGC_POSTMASTER, 0,
+			gettext_noop("Sets the shell command that will be called to fetch database encryption key."),
+			NULL,
+			GUC_NOT_IN_SAMPLE | GUC_SUPERUSER_ONLY | GUC_IS_NAME
+		},
+		&encryption_key_command,
+		NULL,
+		NULL, NULL, show_encryption_key_command
+	},
+#endif							/* USE_ENCRYPTION */
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, NULL, NULL, NULL, NULL
@@ -11094,6 +11124,19 @@ check_ssl(bool *newval, void **extra, GucSource source)
 }
 
 static bool
+check_full_page_writes(bool *newval, void **extra, GucSource source)
+{
+	if (!(*newval) && data_encrypted)
+	{
+		GUC_check_errdetail("Cannot disable parameter when the cluster is encrypted.");
+
+		return false;
+	}
+
+	return true;
+}
+
+static bool
 check_stage_log_stats(bool *newval, void **extra, GucSource source)
 {
 	if (*newval && log_statement_stats)
@@ -11751,4 +11794,15 @@ check_default_with_oids(bool *newval, void **extra, GucSource source)
 	return true;
 }
 
+#ifdef USE_ENCRYPTION
+static const char *
+show_encryption_key_command(void)
+{
+	if (encryption_key_command)
+		return encryption_key_command;
+	else
+		return "(disabled)";
+}
+#endif							/* USE_ENCRYPTION */
+
 #include "guc-file.c"
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 0c905bb55d..e0580ea69f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -90,6 +90,7 @@
 #authentication_timeout = 1min		# 1s-600s
 #password_encryption = md5		# md5 or scram-sha-256
 #db_user_namespace = off
+#encryption_key_command = ''
 
 # GSSAPI using Kerberos
 #krb_server_keyfile = ''
diff --git a/src/bin/Makefile b/src/bin/Makefile
index 903e58121f..d23dbeda55 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -22,6 +22,7 @@ SUBDIRS = \
 	pg_controldata \
 	pg_ctl \
 	pg_dump \
+	pg_keysetup \
 	pg_resetwal \
 	pg_rewind \
 	pg_test_fsync \
diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile
index 7c404430a9..b72de55d62 100644
--- a/src/bin/initdb/Makefile
+++ b/src/bin/initdb/Makefile
@@ -26,7 +26,7 @@ ifneq (,$(with_system_tzdata))
 override CPPFLAGS += '-DSYSTEMTZDIR="$(with_system_tzdata)"'
 endif
 
-OBJS=	initdb.o findtimezone.o localtime.o encnames.o $(WIN32RES)
+OBJS=	initdb.o findtimezone.o localtime.o encnames.o encryption.o $(WIN32RES)
 
 all: initdb
 
@@ -45,6 +45,9 @@ encnames.c: % : $(top_srcdir)/src/backend/utils/mb/%
 localtime.c: % : $(top_srcdir)/src/timezone/%
 	rm -f $@ && $(LN_S) $< .
 
+encryption.c: % : $(top_srcdir)/src/backend/storage/file/%
+	rm -f $@ && $(LN_S) $< .
+
 install: all installdirs
 	$(INSTALL_PROGRAM) initdb$(X) '$(DESTDIR)$(bindir)/initdb$(X)'
 
@@ -55,7 +58,7 @@ uninstall:
 	rm -f '$(DESTDIR)$(bindir)/initdb$(X)'
 
 clean distclean maintainer-clean:
-	rm -f initdb$(X) $(OBJS) encnames.c localtime.c
+	rm -f initdb$(X) $(OBJS) encnames.c localtime.c encryption.c
 	rm -rf tmp_check
 
 # ensure that changes in datadir propagate into object file
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 176bd50075..ad9950e334 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -69,6 +69,9 @@
 #include "common/username.h"
 #include "fe_utils/logging.h"
 #include "fe_utils/string_utils.h"
+#include "storage/encryption.h"
+#include "lib/ilist.h"
+#include "mb/pg_wchar.h"
 #include "getaddrinfo.h"
 #include "getopt_long.h"
 #include "mb/pg_wchar.h"
@@ -78,6 +81,12 @@
 /* Ideally this would be in a .h file, but it hardly seems worth the trouble */
 extern const char *select_default_timezone(const char *share_path);
 
+typedef struct
+{
+	dlist_node	list_node;
+	char	   *value;
+}			extra_option;
+
 static const char *const auth_methods_host[] = {
 	"trust", "reject", "scram-sha-256", "md5", "password", "ident", "radius",
 #ifdef ENABLE_GSS
@@ -142,9 +151,11 @@ static bool do_sync = true;
 static bool sync_only = false;
 static bool show_setting = false;
 static bool data_checksums = false;
+static char *encr_key_cmd_str = NULL;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
 static int	wal_segment_size_mb;
+static dlist_head extra_options = DLIST_STATIC_INIT(extra_options);
 
 
 /* internal vars */
@@ -528,6 +539,7 @@ readfile(const char *path)
 
 	fclose(infile);
 	free(buffer);
+
 	result[n] = NULL;
 
 	return result;
@@ -1072,6 +1084,39 @@ pretty_wal_size(int segment_count)
 	return result;
 }
 
+static void
+append_extra_options(char ***conflines)
+{
+	dlist_iter	iter;
+	int			n_extra = 0;
+	int			n_current = 0;
+	int			i = 0;
+	char	  **new_conflines;
+
+	dlist_foreach(iter, &extra_options)
+	{
+		n_extra++;
+	}
+	while ((*conflines)[i++] != NULL)
+		n_current++;
+
+	new_conflines = (char **) pg_malloc((n_current + n_extra + 1) * sizeof(char *));
+	for (i = 0; i < n_current; i++)
+		new_conflines[i] = (*conflines)[i];
+
+	dlist_foreach(iter, &extra_options)
+	{
+		extra_option *opt = dlist_container(extra_option, list_node, iter.cur);
+
+		new_conflines[i++] = opt->value;
+	}
+
+	new_conflines[i] = NULL;
+	pg_free(*conflines);
+	*conflines = new_conflines;
+}
+
+
 /*
  * set up all the config files
  */
@@ -1090,6 +1135,8 @@ setup_config(void)
 
 	conflines = readfile(conf_file);
 
+	append_extra_options(&conflines);
+
 	snprintf(repltok, sizeof(repltok), "max_connections = %d", n_connections);
 	conflines = replace_token(conflines, "#max_connections = 100", repltok);
 
@@ -1231,6 +1278,14 @@ setup_config(void)
 								  "log_file_mode = 0640");
 	}
 
+	if (encryption_key_command)
+	{
+		snprintf(repltok, sizeof(repltok), "encryption_key_command = '%s'",
+				 encryption_key_command);
+		conflines = replace_token(conflines,
+								  "#encryption_key_command = ''", repltok);
+	}
+
 	snprintf(path, sizeof(path), "%s/postgresql.conf", pg_data);
 
 	writefile(path, conflines);
@@ -1444,11 +1499,27 @@ bootstrap_template1(void)
 	/* Also ensure backend isn't confused by this environment var: */
 	unsetenv("PGCLIENTENCODING");
 
+	/* Prepare the -K option for the backend. */
+	if (encryption_key_command)
+	{
+		size_t		len;
+
+		len = 3 + strlen(encryption_key_command) + 1;
+		encr_key_cmd_str = (char *) pg_malloc(len);
+		snprintf(encr_key_cmd_str, len, "-K %s", encryption_key_command);
+	}
+	else
+	{
+		encr_key_cmd_str = (char *) pg_malloc(1);
+		encr_key_cmd_str[0] = '\0';
+	}
+
 	snprintf(cmd, sizeof(cmd),
-			 "\"%s\" --boot -x1 -X %u %s %s %s",
+			 "\"%s\" --boot -x1 -X %u %s %s %s %s",
 			 backend_exec,
 			 wal_segment_size_mb * (1024 * 1024),
 			 data_checksums ? "-k" : "",
+			 encr_key_cmd_str,
 			 boot_options,
 			 debug ? "-d 5" : "");
 
@@ -2376,6 +2447,10 @@ usage(const char *progname)
 	printf(_("\nLess commonly used options:\n"));
 	printf(_("  -d, --debug               generate lots of debugging output\n"));
 	printf(_("  -k, --data-checksums      use data page checksums\n"));
+#ifdef	USE_ENCRYPTION
+	printf(_("  -K, --encryption-key-command\n"
+			 "                            command that returns encryption key\n"));
+#endif							/* USE_OPENSSL */
 	printf(_("  -L DIRECTORY              where to find the input files\n"));
 	printf(_("  -n, --no-clean            do not clean up after errors\n"));
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
@@ -2479,7 +2554,6 @@ setup_pgdata(void)
 	putenv(pgdata_set_env);
 }
 
-
 void
 setup_bin_paths(const char *argv0)
 {
@@ -2978,8 +3052,8 @@ initialize_data_directory(void)
 	fflush(stdout);
 
 	snprintf(cmd, sizeof(cmd),
-			 "\"%s\" %s template1 >%s",
-			 backend_exec, backend_options,
+			 "\"%s\" %s %s template1 >%s",
+			 backend_exec, backend_options, encr_key_cmd_str,
 			 DEVNULL);
 
 	PG_CMD_OPEN;
@@ -3018,6 +3092,23 @@ initialize_data_directory(void)
 	check_ok();
 }
 
+static void
+parse_extra_option_arg(char *optarg)
+{
+	extra_option *opt;
+
+	if (!strchr(optarg, '='))
+	{
+		fprintf(stderr, _("Option value is not in key=value format"));
+		exit(1);
+	}
+
+	opt = malloc(sizeof(extra_option));
+	if (asprintf(&opt->value, "%s\n", optarg) < 0)
+		exit(1);
+
+	dlist_push_tail(&extra_options, &opt->list_node);
+}
 
 int
 main(int argc, char *argv[])
@@ -3052,6 +3143,9 @@ main(int argc, char *argv[])
 		{"waldir", required_argument, NULL, 'X'},
 		{"wal-segsize", required_argument, NULL, 12},
 		{"data-checksums", no_argument, NULL, 'k'},
+#ifdef	USE_ENCRYPTION
+		{"encryption-key-command", required_argument, NULL, 'K'},
+#endif							/* USE_OPENSSL */
 		{"allow-group-access", no_argument, NULL, 'g'},
 		{NULL, 0, NULL, 0}
 	};
@@ -3094,7 +3188,7 @@ main(int argc, char *argv[])
 
 	/* process command-line options */
 
-	while ((c = getopt_long(argc, argv, "dD:E:kL:nNU:WA:sST:X:g", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "dD:E:kK:L:nNU:WA:sST:X:g", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -3146,6 +3240,14 @@ main(int argc, char *argv[])
 			case 'k':
 				data_checksums = true;
 				break;
+#ifdef	USE_ENCRYPTION
+			case 'K':
+				encryption_key_command = pg_strdup(optarg);
+				break;
+#endif							/* USE_ENCRYPTION */
+			case 'c':
+				parse_extra_option_arg(optarg);
+				break;
 			case 'L':
 				share_path = pg_strdup(optarg);
 				break;
@@ -3316,7 +3418,10 @@ main(int argc, char *argv[])
 	if (pwprompt || pwfilename)
 		get_su_pwd();
 
-	printf("\n");
+	if (encryption_key_command)
+		printf(_("Data encryption is enabled.\n"));
+	else
+		printf(_("Data encryption is disabled.\n"));
 
 	initialize_data_directory();
 
diff --git a/src/bin/pg_checksums/Makefile b/src/bin/pg_checksums/Makefile
index 13a25f5e33..90f12d233f 100644
--- a/src/bin/pg_checksums/Makefile
+++ b/src/bin/pg_checksums/Makefile
@@ -17,13 +17,18 @@ include $(top_builddir)/src/Makefile.global
 
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils
 
-OBJS= pg_checksums.o $(WIN32RES)
+OBJS= encryption.o pg_checksums.o $(WIN32RES)
+
+override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
 
 all: pg_checksums
 
 pg_checksums: $(OBJS) | submake-libpgport submake-libpgfeutils
 	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
 
+encryption.c: % : $(top_srcdir)/src/backend/storage/file/%
+	rm -f $@ && $(LN_S) $< .
+
 install: all installdirs
 	$(INSTALL_PROGRAM) pg_checksums$(X) '$(DESTDIR)$(bindir)/pg_checksums$(X)'
 
@@ -36,6 +41,7 @@ uninstall:
 clean distclean maintainer-clean:
 	rm -f pg_checksums$(X) $(OBJS)
 	rm -rf tmp_check
+	rm -f encryption.c
 
 check:
 	$(prove_check)
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index bc89982658..0cf49eadcd 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -20,6 +20,7 @@
 #include <unistd.h>
 
 #include "access/xlog_internal.h"
+#include "catalog/pg_tablespace_d.h"
 #include "common/controldata_utils.h"
 #include "common/file_perm.h"
 #include "common/file_utils.h"
@@ -29,6 +30,7 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/checksum_impl.h"
+#include "storage/encryption.h"
 
 
 static int64 files = 0;
@@ -80,6 +82,10 @@ usage(void)
 	printf(_("  -c, --check            check data checksums (default)\n"));
 	printf(_("  -d, --disable          disable data checksums\n"));
 	printf(_("  -e, --enable           enable data checksums\n"));
+#ifdef	USE_ENCRYPTION
+	printf(_("  -K, --encryption-key-command=COMMAND\n"
+			 "                         command that returns encryption key\n"));
+#endif							/* USE_ENCRYPTION */
 	printf(_("  -N, --no-sync          do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress         show progress information\n"));
 	printf(_("  -v, --verbose          output verbose messages\n"));
@@ -167,7 +173,8 @@ skipfile(const char *fn)
 }
 
 static void
-scan_file(const char *fn, BlockNumber segmentno)
+scan_file(const char *fn, Oid relnode, BlockNumber segmentno,
+		  ForkNumber forkno, Oid tbspace, Oid db)
 {
 	PGAlignedBlock buf;
 	PageHeader	header = (PageHeader) buf.data;
@@ -193,6 +200,7 @@ scan_file(const char *fn, BlockNumber segmentno)
 	{
 		uint16		csum;
 		int			r = read(f, buf.data, BLCKSZ);
+		char		tweak[TWEAK_SIZE];
 
 		if (r == 0)
 			break;
@@ -204,6 +212,18 @@ scan_file(const char *fn, BlockNumber segmentno)
 		}
 		blocks++;
 
+		if (data_encrypted)
+		{
+			RelFileNode node;
+
+			node.spcNode = tbspace;
+			node.dbNode = db;
+			node.relNode = relnode;
+
+			mdtweak(tweak, &node, forkno, blockno);
+			decrypt_block(buf.data, buf.data, BLCKSZ, tweak, false);
+		}
+
 		/* New pages have no checksum yet */
 		if (PageIsNew(header))
 			continue;
@@ -232,6 +252,9 @@ scan_file(const char *fn, BlockNumber segmentno)
 				exit(1);
 			}
 
+			if (data_encrypted)
+				encrypt_block(buf.data, buf.data, BLCKSZ, tweak, false);
+
 			/* Write block with checksum */
 			if (write(f, buf.data, BLCKSZ) != BLCKSZ)
 			{
@@ -262,9 +285,13 @@ scan_file(const char *fn, BlockNumber segmentno)
  * all the items which have checksums is computed and returned back
  * to the caller without operating on the files.  This is used to compile
  * the total size of the data directory for progress reports.
+ *
+ * If db is a valid pointer, *db contains database OID. If it's NULL, the
+ * database OID needs to be recognized, possibly by recursive call.
  */
 static int64
-scan_directory(const char *basedir, const char *subdir, bool sizeonly)
+scan_directory(const char *basedir, const char *subdir, bool sizeonly,
+			   Oid tbspace, Oid *db)
 {
 	int64		dirsize = 0;
 	char		path[MAXPGPATH];
@@ -282,6 +309,9 @@ scan_directory(const char *basedir, const char *subdir, bool sizeonly)
 	{
 		char		fn[MAXPGPATH];
 		struct stat st;
+		bool		tbsp_identified = false;
+		Oid			database;
+		bool		db_identified = false;
 
 		if (strcmp(de->d_name, ".") == 0 ||
 			strcmp(de->d_name, "..") == 0)
@@ -305,12 +335,58 @@ scan_directory(const char *basedir, const char *subdir, bool sizeonly)
 			pg_log_error("could not stat file \"%s\": %m", fn);
 			exit(1);
 		}
+
+		if (tbspace != InvalidOid && db == NULL)
+		{
+			/*
+			 * If tablespace is passed by caller or identified by upper call
+			 * of this function, and if caller could not identify the database
+			 * OID, try to do it now.
+			 */
+			if (strcmp(de->d_name, TABLESPACE_VERSION_DIRECTORY) == 0)
+			{
+				/*
+				 * Major-version-specific tablespace subdirectory needs no
+				 * special attention, we'll recurse into it below.
+				 */
+			}
+			else
+			{
+				errno = 0;
+				database = strtol(de->d_name, NULL, 10);
+				if (errno != 0 || database == InvalidOid)
+				{
+					fprintf(stderr, _("%s: invalid database oid \"%s\"\n"),
+							progname, de->d_name);
+					exit(1);
+				}
+				db_identified = true;
+			}
+		}
+		else if (tbspace == InvalidOid)
+		{
+			/*
+			 * This entry should be a direct subdirectory of pg_tblspc, so the
+			 * name is supposedly tablespace oid.
+			 */
+			errno = 0;
+			tbspace = strtol(de->d_name, NULL, 10);
+			if (errno != 0 || tbspace == InvalidOid)
+			{
+				fprintf(stderr, _("%s: invalid tablespace oid \"%s\"\n"),
+						progname, de->d_name);
+				exit(1);
+			}
+			tbsp_identified = true;
+		}
+
 		if (S_ISREG(st.st_mode))
 		{
 			char		fnonly[MAXPGPATH];
 			char	   *forkpath,
 					   *segmentpath;
 			BlockNumber segmentno = 0;
+			ForkNumber	forkno;
 
 			if (skipfile(de->d_name))
 				continue;
@@ -337,8 +413,14 @@ scan_directory(const char *basedir, const char *subdir, bool sizeonly)
 
 			forkpath = strchr(fnonly, '_');
 			if (forkpath != NULL)
+			{
 				*forkpath++ = '\0';
 
+				forkno = forkname_to_number(forkpath);
+			}
+			else
+				forkno = MAIN_FORKNUM;
+
 			if (only_relfilenode && strcmp(only_relfilenode, fnonly) != 0)
 				/* Relfilenode not to be included */
 				continue;
@@ -350,14 +432,31 @@ scan_directory(const char *basedir, const char *subdir, bool sizeonly)
 			 * the items in the data folder.
 			 */
 			if (!sizeonly)
-				scan_file(fn, segmentno);
+			{
+				Oid			relnode = atoi(fnonly);
+
+				scan_file(fn, relnode, segmentno, forkno, tbspace, *db);
+			}
 		}
 #ifndef WIN32
 		else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
 #else
 		else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
 #endif
-			dirsize += scan_directory(path, de->d_name, sizeonly);
+
+			/*
+			 * If database OID is not passed by caller, use the one we
+			 * identified ourselves.
+			 */
+			dirsize = scan_directory(path, de->d_name, sizeonly, tbspace,
+									 db_identified ? &database : db);
+
+		/*
+		 * If tablespace is not passed by caller, forget what we found out so
+		 * that next directory entry is examined in the same way.
+		 */
+		if (tbsp_identified)
+			tbspace = InvalidOid;
 	}
 	closedir(dir);
 	return dirsize;
@@ -371,6 +470,7 @@ main(int argc, char *argv[])
 		{"pgdata", required_argument, NULL, 'D'},
 		{"disable", no_argument, NULL, 'd'},
 		{"enable", no_argument, NULL, 'e'},
+		{"encryption-key-command", required_argument, NULL, 'K'},
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
@@ -400,7 +500,7 @@ main(int argc, char *argv[])
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "cD:deNPr:v", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "cD:deNK:Pr:v", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -413,6 +513,12 @@ main(int argc, char *argv[])
 			case 'e':
 				mode = PG_MODE_ENABLE;
 				break;
+#ifdef	USE_ENCRYPTION
+			case 'K':
+				encryption_key_command = pg_strdup(optarg);
+				data_encrypted = true;
+				break;
+#endif							/* USE_ENCRYPTION */
 			case 'N':
 				do_sync = false;
 				break;
@@ -482,6 +588,23 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
+	if (ControlFile->data_cipher > PG_CIPHER_NONE)
+	{
+		if (encryption_key_command == NULL)
+		{
+#ifdef USE_ENCRYPTION
+			fprintf(stderr, _("%s: please specify command to retrieve encryption key\n"),
+					progname);
+#else
+			fprintf(stderr, _("%s: compile postgres with --with-openssl to use encryption\n"),
+					progname);
+#endif							/* USE_ENCRYPTION */
+			exit(1);
+		}
+
+		setup_encryption(false, DataDir);
+	}
+
 	if (ControlFile->pg_control_version != PG_CONTROL_VERSION)
 	{
 		pg_log_error("cluster is not compatible with this version of pg_checksums");
@@ -527,6 +650,8 @@ main(int argc, char *argv[])
 	/* Operate on all files if checking or enabling checksums */
 	if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE)
 	{
+		Oid			db_shared = 0;
+
 		/*
 		 * If progress status information is requested, we need to scan the
 		 * directory tree twice: once to know how much total data needs to be
@@ -534,14 +659,19 @@ main(int argc, char *argv[])
 		 */
 		if (showprogress)
 		{
-			total_size = scan_directory(DataDir, "global", true);
-			total_size += scan_directory(DataDir, "base", true);
-			total_size += scan_directory(DataDir, "pg_tblspc", true);
+			total_size = scan_directory(DataDir, "global", true,
+										GLOBALTABLESPACE_OID, &db_shared);
+			total_size += scan_directory(DataDir, "base", true,
+										 DEFAULTTABLESPACE_OID, NULL);
+			total_size += scan_directory(DataDir, "pg_tblspc", true,
+										 InvalidOid, NULL);
 		}
 
-		(void) scan_directory(DataDir, "global", false);
-		(void) scan_directory(DataDir, "base", false);
-		(void) scan_directory(DataDir, "pg_tblspc", false);
+		(void) scan_directory(DataDir, "global", false, GLOBALTABLESPACE_OID,
+							  &db_shared);
+		(void) scan_directory(DataDir, "base", false, DEFAULTTABLESPACE_OID,
+							  NULL);
+		(void) scan_directory(DataDir, "pg_tblspc", false, InvalidOid, NULL);
 
 		if (showprogress)
 		{
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index a674f52f0b..f39dfb50e8 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -344,5 +344,14 @@ main(int argc, char *argv[])
 		   ControlFile->data_checksum_version);
 	printf(_("Mock authentication nonce:            %s\n"),
 		   mock_auth_nonce_str);
+	printf(_("Data encryption:                      %s\n"),
+		   ControlFile->data_cipher > PG_CIPHER_NONE ? _("on") : _("off"));
+	if (ControlFile->data_cipher > PG_CIPHER_NONE)
+		printf(_("Data encryption fingerprint:          %08X%08X%08X%08X\n"),
+			   htonl(((uint32 *) ControlFile->encryption_verification)[0]),
+			   htonl(((uint32 *) ControlFile->encryption_verification)[1]),
+			   htonl(((uint32 *) ControlFile->encryption_verification)[2]),
+			   htonl(((uint32 *) ControlFile->encryption_verification)[3])
+			);
 	return 0;
 }
diff --git a/src/bin/pg_keysetup/Makefile b/src/bin/pg_keysetup/Makefile
new file mode 100644
index 0000000000..92ef5034ee
--- /dev/null
+++ b/src/bin/pg_keysetup/Makefile
@@ -0,0 +1,32 @@
+# src/bin/pg_keysetup/Makefile
+
+PGFILEDESC = "pg_keysetup - generate encryption key from a password"
+PGAPPICON=win32
+
+subdir = src/bin/pg_keysetup
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = pg_keysetup.o $(RMGRDESCOBJS) encryption.o $(WIN32RES)
+
+override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
+
+all: pg_keysetup
+
+pg_keysetup: $(OBJS) | submake-libpgport
+	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+
+encryption.c: % : $(top_srcdir)/src/backend/storage/file/%
+	rm -f $@ && $(LN_S) $< .
+
+install: all installdirs
+	$(INSTALL_PROGRAM) pg_keysetup$(X) '$(DESTDIR)$(bindir)/pg_keysetup$(X)'
+
+installdirs:
+	$(MKDIR_P) '$(DESTDIR)$(bindir)'
+
+uninstall:
+	rm -f '$(DESTDIR)$(bindir)/pg_keysetup$(X)'
+
+clean distclean maintainer-clean:
+	rm -f pg_keysetup$(X) $(OBJS) encryption.c
diff --git a/src/bin/pg_keysetup/pg_keysetup.c b/src/bin/pg_keysetup/pg_keysetup.c
new file mode 100644
index 0000000000..99891e6a69
--- /dev/null
+++ b/src/bin/pg_keysetup/pg_keysetup.c
@@ -0,0 +1,166 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_keysetup.c - Turn password into encryption key.
+ *
+ * Copyright (c) 2013-2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		  src/bin/pg_keysetup/pg_keysetup.c
+ *-------------------------------------------------------------------------
+ */
+
+#define FRONTEND 1
+#include "postgres.h"
+
+#include <dirent.h>
+#include <unistd.h>
+
+#include "port/pg_crc32c.h"
+#include "storage/encryption.h"
+#include "getopt_long.h"
+
+#ifdef USE_ENCRYPTION
+static const char *progname;
+
+static void
+usage(const char *progname)
+{
+	printf(_("%s derives encryption key from a password.\n\n"),
+		   progname);
+	printf(_("Usage:\n"));
+	printf(_("  %s [OPTION]...\n"), progname);
+	printf(_("\nOptions:\n"));
+	printf(_(" [-D] DATADIR    data directory\n"));
+	printf(_("  -?, --help             show this help, then exit\n\n"));
+	printf(_("Password is read from stdin and the key is sent to stdout\n"));
+}
+
+/*
+ * Big red button to push when things go horribly wrong.
+ */
+static void
+fatal_error(const char *fmt,...)
+{
+	va_list		args;
+
+	fflush(stdout);
+
+	fprintf(stderr, _("%s: FATAL:  "), progname);
+	va_start(args, fmt);
+	vfprintf(stderr, _(fmt), args);
+	va_end(args);
+	fputc('\n', stderr);
+
+
+	exit(EXIT_FAILURE);
+}
+#endif							/* USE_ENCRYPTION */
+
+int
+main(int argc, char **argv)
+{
+/*
+ * If no encryption library is linked, let the utility fail immediately. It'd
+ * be weird if we reported incorrect usage just to say later that no useful
+ * work can be done anyway.
+ */
+#ifdef USE_ENCRYPTION
+	int			c;
+	char	   *DataDir = NULL;
+	char		password[ENCRYPTION_PWD_MAX_LENGTH];
+	size_t		pwd_len,
+				i;
+
+	progname = get_progname(argv[0]);
+
+	if (argc > 1)
+	{
+		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+		{
+			usage(progname);
+			exit(0);
+		}
+		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+		{
+			puts("pg_controldata (PostgreSQL) " PG_VERSION);
+			exit(0);
+		}
+	}
+
+	while ((c = getopt(argc, argv, "D:")) != -1)
+	{
+		switch (c)
+		{
+			case 'D':
+				DataDir = optarg;
+				break;
+
+			default:
+				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+				exit(1);
+		}
+	}
+
+	if (DataDir == NULL)
+	{
+		if (optind < argc)
+			DataDir = argv[optind++];
+		else
+			DataDir = getenv("PGDATA");
+	}
+
+	/* Complain if any arguments remain */
+	if (optind < argc)
+	{
+		fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"),
+				progname, argv[optind]);
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+				progname);
+		exit(1);
+	}
+
+	if (DataDir == NULL)
+	{
+		fprintf(stderr, _("%s: no data directory specified\n"), progname);
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+		exit(1);
+	}
+
+	read_kdf_file(DataDir);
+
+	/*
+	 * Read the password.
+	 */
+	pwd_len = 0;
+	while (true)
+	{
+		int			c = getchar();
+
+		if (c == EOF || c == '\n')
+			break;
+
+		if (pwd_len >= ENCRYPTION_PWD_MAX_LENGTH)
+			fatal_error("The password is too long");
+
+		password[pwd_len++] = c;
+	}
+
+	if (pwd_len < ENCRYPTION_PWD_MIN_LENGTH)
+		fatal_error("The password is too short");
+
+	/*
+	 * Run the key derivation function.
+	 */
+	setup_encryption_key(password, false, pwd_len);
+
+	/*
+	 * Finally print the encryption key.
+	 */
+	for (i = 0; i < ENCRYPTION_KEY_LENGTH; i++)
+		printf("%.2x", encryption_key[i]);
+	printf("\n");
+#else
+	ENCRYPTION_NOT_SUPPORTED_MSG;
+#endif							/* USE_ENCRYPTION */
+	return 0;
+}
diff --git a/src/bin/pg_resetwal/Makefile b/src/bin/pg_resetwal/Makefile
index 09ede1efd2..5d31ac5baa 100644
--- a/src/bin/pg_resetwal/Makefile
+++ b/src/bin/pg_resetwal/Makefile
@@ -15,15 +15,20 @@ subdir = src/bin/pg_resetwal
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
+override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
+
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils
 
-OBJS= pg_resetwal.o $(WIN32RES)
+OBJS= pg_resetwal.o encryption.o $(WIN32RES)
 
 all: pg_resetwal
 
 pg_resetwal: $(OBJS) | submake-libpgport submake-libpgfeutils
 	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
 
+encryption.c: % : $(top_srcdir)/src/backend/storage/file/%
+	rm -f $@ && $(LN_S) $< .
+
 install: all installdirs
 	$(INSTALL_PROGRAM) pg_resetwal$(X) '$(DESTDIR)$(bindir)/pg_resetwal$(X)'
 
@@ -34,7 +39,7 @@ uninstall:
 	rm -f '$(DESTDIR)$(bindir)/pg_resetwal$(X)'
 
 clean distclean maintainer-clean:
-	rm -f pg_resetwal$(X) $(OBJS)
+	rm -f pg_resetwal$(X) $(OBJS) encryption.c
 	rm -rf tmp_check
 
 check:
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index 82a8ec993d..90bb3b63d5 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -54,11 +54,11 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "fe_utils/logging.h"
+#include "storage/encryption.h"
 #include "storage/large_object.h"
 #include "pg_getopt.h"
 #include "getopt_long.h"
 
-
 static ControlFileData ControlFile; /* pg_control values */
 static XLogSegNo newXlogSegNo;	/* new XLOG segment # */
 static bool guessed = false;	/* T if we had to guess at any values */
@@ -135,7 +135,7 @@ main(int argc, char *argv[])
 	}
 
 
-	while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:x:", long_options, NULL)) != -1)
+	while ((c = getopt_long(argc, argv, "c:D:e:fK:l:m:no:O:x:", long_options, NULL)) != -1)
 	{
 		switch (c)
 		{
@@ -277,6 +277,12 @@ main(int argc, char *argv[])
 				}
 				break;
 
+#ifdef	USE_ENCRYPTION
+			case 'K':
+				encryption_key_command = strdup(optarg);
+				break;
+#endif							/* USE_ENCRYPTION */
+
 			case 'l':
 				if (strspn(optarg, "01234567890ABCDEFabcdef") != XLOG_FNAME_LEN)
 				{
@@ -408,6 +414,16 @@ main(int argc, char *argv[])
 		XLogFromFileName(log_fname, &minXlogTli, &minXlogSegNo, WalSegSz);
 
 	/*
+	 * If the data is encrypted, we also might need to encrypt the XLOG record
+	 * below.
+	 */
+	if (ControlFile.data_cipher > PG_CIPHER_NONE && !noupdate)
+	{
+		setup_encryption(false, NULL);
+		data_encrypted = true;
+	}
+
+	/*
 	 * Also look at existing segment files to set up newXlogSegNo
 	 */
 	FindEndOfXLOG();
@@ -823,6 +839,12 @@ PrintControlValues(bool guessed)
 		   (ControlFile.float8ByVal ? _("by value") : _("by reference")));
 	printf(_("Data page checksum version:           %u\n"),
 		   ControlFile.data_checksum_version);
+	if (ControlFile.data_cipher > PG_CIPHER_NONE)
+		printf(_("Data encryption fingerprint:          %08X%08X%08X%08X\n"),
+			   htonl(((uint32 *) ControlFile.encryption_verification)[0]),
+			   htonl(((uint32 *) ControlFile.encryption_verification)[1]),
+			   htonl(((uint32 *) ControlFile.encryption_verification)[2]),
+			   htonl(((uint32 *) ControlFile.encryption_verification)[3]));
 }
 
 
@@ -1173,6 +1195,14 @@ WriteEmptyXLOG(void)
 	FIN_CRC32C(crc);
 	record->xl_crc = crc;
 
+	if (data_encrypted)
+	{
+		char		tweak[TWEAK_SIZE];
+
+		XLogEncryptionTweak(tweak, page->xlp_tli, newXlogSegNo, 0);
+		encrypt_block(buffer.data, buffer.data, XLOG_BLCKSZ, tweak, true);
+	}
+
 	/* Write the first page */
 	XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
 				 newXlogSegNo, WalSegSz);
@@ -1233,6 +1263,10 @@ usage(void)
 	printf(_(" [-D, --pgdata=]DATADIR          data directory\n"));
 	printf(_("  -e, --epoch=XIDEPOCH           set next transaction ID epoch\n"));
 	printf(_("  -f, --force                    force update to be done\n"));
+#ifdef	USE_ENCRYPTION
+	printf(_("  -K, --encryption-key-command\n"
+			 "					 command that returns encryption key\n"));
+#endif							/* USE_ENCRYPTION */
 	printf(_("  -l, --next-wal-file=WALFILE    set minimum starting location for new WAL\n"));
 	printf(_("  -m, --multixact-ids=MXID,MXID  set next and oldest multitransaction ID\n"));
 	printf(_("  -n, --dry-run                  no update, just show what would be done\n"));
diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile
index 019e19986e..8b7b3fe750 100644
--- a/src/bin/pg_rewind/Makefile
+++ b/src/bin/pg_rewind/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
 
 OBJS	= pg_rewind.o parsexlog.o xlogreader.o datapagemap.o timeline.o \
 	fetch.o file_ops.o copy_fetch.o libpq_fetch.o filemap.o logging.o \
-	$(WIN32RES)
+	encryption.o $(WIN32RES)
 
 EXTRA_CLEAN = xlogreader.c
 
@@ -32,6 +32,9 @@ pg_rewind: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
 xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
 	rm -f $@ && $(LN_S) $< .
 
+encryption.c: % : $(top_srcdir)/src/backend/storage/file/%
+	rm -f $@ && $(LN_S) $< .
+
 install: all installdirs
 	$(INSTALL_PROGRAM) pg_rewind$(X) '$(DESTDIR)$(bindir)/pg_rewind$(X)'
 
@@ -42,7 +45,7 @@ uninstall:
 	rm -f '$(DESTDIR)$(bindir)/pg_rewind$(X)'
 
 clean distclean maintainer-clean:
-	rm -f pg_rewind$(X) $(OBJS) xlogreader.c
+	rm -f pg_rewind$(X) $(OBJS) xlogreader.c encryption.c
 	rm -rf tmp_check
 
 check:
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 04a3535dfb..b185895368 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -23,7 +23,7 @@
 #include "catalog/pg_control.h"
 #include "catalog/storage_xlog.h"
 #include "commands/dbcommands_xlog.h"
-
+#include "storage/encryption.h"
 
 /*
  * RmgrNames is an array of resource manager names, to make error messages
@@ -321,6 +321,15 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
+	if (data_encrypted)
+	{
+		char		tweak[TWEAK_SIZE];
+
+		XLogEncryptionTweak(tweak, targetHistory[private->tliIndex].tli,
+							xlogreadsegno, targetPageOff);
+		decrypt_block(readBuf, readBuf, XLOG_BLCKSZ, tweak, true);
+	}
+
 	Assert(targetSegNo == xlogreadsegno);
 
 	*pageTLI = targetHistory[private->tliIndex].tli;
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index e552c84964..8528c933c0 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -31,6 +31,7 @@
 #include "fe_utils/logging.h"
 #include "getopt_long.h"
 #include "storage/bufpage.h"
+#include "storage/encryption.h"
 
 static void usage(const char *progname);
 
@@ -72,6 +73,10 @@ usage(const char *progname)
 	printf(_("  -D, --target-pgdata=DIRECTORY  existing data directory to modify\n"));
 	printf(_("      --source-pgdata=DIRECTORY  source data directory to synchronize with\n"));
 	printf(_("      --source-server=CONNSTR    source server to synchronize with\n"));
+#ifdef	USE_ENCRYPTION
+	printf(_("  -K, --encryption-key-command=COMMAND\n"
+			 "                                 command that returns encryption key\n"));
+#endif							/* USE_OPENSSL */
 	printf(_("  -n, --dry-run                  stop before modifying anything\n"));
 	printf(_("  -N, --no-sync                  do not wait for changes to be written\n"));
 	printf(_("                                 safely to disk\n"));
@@ -96,6 +101,9 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+#ifdef	USE_ENCRYPTION
+		{"encryption-key-command", required_argument, NULL, 'K'},
+#endif							/* USE_OPENSSL */
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -131,7 +139,7 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "D:nNP", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "D:K:nNP", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -166,6 +174,12 @@ main(int argc, char **argv)
 			case 2:				/* --source-server */
 				connstr_source = pg_strdup(optarg);
 				break;
+#ifdef	USE_ENCRYPTION
+			case 4:				/* --encryption-key-command */
+			case 'K':
+				encryption_key_command = strdup(optarg);
+				break;
+#endif							/* USE_OPENSSL */
 		}
 	}
 
@@ -245,6 +259,21 @@ main(int argc, char **argv)
 	sanityChecks();
 
 	/*
+	 * Setup encryption if it's obvious that we'll have to deal with encrypted
+	 * XLOG.
+	 */
+	if (ControlFile_target.data_cipher > PG_CIPHER_NONE)
+	{
+		/*
+		 * It should not matter whether we pass the source or target data
+		 * directory. It should have been checked earlier that both clusters
+		 * are encrypted using the same key.
+		 */
+		setup_encryption(false, datadir_source);
+		data_encrypted = true;
+	}
+
+	/*
 	 * If both clusters are already on the same timeline, there's nothing to
 	 * do.
 	 */
@@ -443,6 +472,24 @@ sanityChecks(void)
 		ControlFile_source.state != DB_SHUTDOWNED &&
 		ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY)
 		pg_fatal("source data directory must be shut down cleanly");
+
+	/*
+	 * Since slave receives XLOG stream encrypted by master, handling
+	 * differently encrypted clusters is not the typical use case for
+	 * pg_rewind. Yet we should check the encryption.
+	 */
+	if (ControlFile_source.data_cipher > PG_CIPHER_NONE ||
+		ControlFile_target.data_cipher > PG_CIPHER_NONE)
+	{
+		if (ControlFile_source.data_cipher !=
+			ControlFile_target.data_cipher)
+			pg_fatal("source and target server must be both unencrypted or both encrypted\n");
+
+		if (memcmp(ControlFile_source.encryption_verification,
+				   ControlFile_target.encryption_verification,
+				   ENCRYPTION_SAMPLE_SIZE))
+			pg_fatal("both source and target server must use the same encryption key");
+	}
 }
 
 /*
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index 5a18948425..99e7846b4d 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -9,9 +9,9 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = check.o controldata.o dump.o exec.o file.o function.o info.o \
        option.o parallel.o pg_upgrade.o relfilenode.o server.o \
-       tablespace.o util.o version.o $(WIN32RES)
+       tablespace.o util.o version.o encryption.o $(WIN32RES)
 
-override CPPFLAGS := -DDLSUFFIX=\"$(DLSUFFIX)\" -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS)
+override CPPFLAGS := -DDLSUFFIX=\"$(DLSUFFIX)\" -I$(srcdir) -I$(libpq_srcdir) -DFRONTEND $(CPPFLAGS)
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
 
 ifdef NO_TEMP_INSTALL
@@ -29,6 +29,9 @@ all: pg_upgrade
 pg_upgrade: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
 	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
 
+encryption.c: % : $(top_srcdir)/src/backend/storage/file/%
+	rm -f $@ && $(LN_S) $< .
+
 install: all installdirs
 	$(INSTALL_PROGRAM) pg_upgrade$(X) '$(DESTDIR)$(bindir)/pg_upgrade$(X)'
 
@@ -43,7 +46,8 @@ clean distclean maintainer-clean:
 	rm -rf analyze_new_cluster.sh delete_old_cluster.sh log/ tmp_check/ \
 	       loadable_libraries.txt reindex_hash.sql \
 	       pg_upgrade_dump_globals.sql \
-	       pg_upgrade_dump_*.custom pg_upgrade_*.log
+	       pg_upgrade_dump_*.custom pg_upgrade_*.log \
+		encryption.c
 
 check: test.sh all
 	MAKE=$(MAKE) bindir="$(tbindir)" libdir="$(tlibdir)" EXTRA_REGRESS_OPTS="$(EXTRA_REGRESS_OPTS)" $(SHELL) $< $(DOINST)
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index fc5aa7010f..11f2d036f9 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -12,11 +12,13 @@
 #include "catalog/pg_authid_d.h"
 #include "fe_utils/string_utils.h"
 #include "mb/pg_wchar.h"
+#include "storage/encryption.h"
 #include "pg_upgrade.h"
 
 
 static void check_new_cluster_is_empty(void);
 static void check_databases_are_compatible(void);
+static void get_encryption_key_command(ClusterInfo *cluster);
 static void check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb);
 static bool equivalent_locale(int category, const char *loca, const char *locb);
 static void check_is_install_user(ClusterInfo *cluster);
@@ -307,6 +309,92 @@ check_cluster_compatibility(bool live_check)
 				 "the old and new port numbers must be different.\n");
 }
 
+/*
+ * Check if both clusters are encrypted using the same key.
+ *
+ * One extra start / stop cycle of each cluster is needed to retrieve the
+ * encryption_key_command, which we'll be needed by
+ * check_cluster_compatibility(). XXX Should we get the value from
+ * postgresql.conf (note that it can be located outside the data directory)?
+ */
+void
+check_encryption_keys(void)
+{
+	char	   *old_key,
+			   *new_key;
+	bool		is_key;
+	size_t		old_key_len,
+				new_key_len;
+
+	start_postmaster(&old_cluster, true);
+	get_encryption_key_command(&old_cluster);
+	stop_postmaster(false);
+
+	old_key = run_encryption_key_command(old_cluster.encryption_key_command,
+										 &is_key, &old_key_len);
+
+	/*
+	 * pg_upgrade only accepts encryption key since different keys should be
+	 * derived from the same password on different systems anyway.
+	 */
+	if (!is_key)
+		encryption_error(true,
+						 "The old cluster uses encryption password instead of encryption key");
+
+	/*
+	 * The same for the new cluster.
+	 */
+	start_postmaster(&new_cluster, true);
+	get_encryption_key_command(&new_cluster);
+	stop_postmaster(false);
+	new_key = run_encryption_key_command(new_cluster.encryption_key_command,
+										 &is_key, &new_key_len);
+	if (!is_key)
+		encryption_error(true,
+						 "The new cluster uses encryption password instead of encryption key");
+
+	/*
+	 * Naturally, both clusters must use the same key, else the files we don't
+	 * re-encrypt will not be usable by the new cluster.
+	 */
+	if (old_key_len != new_key_len ||
+		memcmp(old_key, new_key, new_key_len) != 0)
+		encryption_error(true, "The clusters use different encryption key");
+}
+
+/*
+ *	get_encryption_key_command()
+ *
+ *	Retrieve the value of encryption_key_command configuration variable.
+ */
+static void
+get_encryption_key_command(ClusterInfo *cluster)
+{
+	PGresult   *res;
+	PGconn	   *conn = connectToServer(cluster, "template1");
+
+	prep_status("Retrieving encryption_key_command");
+
+	res = executeQueryOrDie(conn,
+							"SELECT setting "
+							"FROM pg_catalog.pg_settings "
+							"WHERE name = 'encryption_key_command'");
+
+	if (PQntuples(res) != 0)
+	{
+		int			i_setting = PQfnumber(res, "setting");
+		char	   *setting = PQgetvalue(res, 0, i_setting);
+
+		cluster->encryption_key_command = setting != NULL ?
+			pg_strdup(setting) : NULL;
+	}
+
+	PQclear(res);
+
+	PQfinish(conn);
+
+	check_ok();
+}
 
 /*
  * check_locale_and_encoding()
diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index 163a3f8c9e..b04d7b9b26 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -8,6 +8,7 @@
  */
 
 #include "postgres_fe.h"
+#include "catalog/pg_control.h"
 
 #include "pg_upgrade.h"
 
@@ -486,6 +487,33 @@ get_control_data(ClusterInfo *cluster, bool live_check)
 			cluster->controldata.data_checksum_version = str2uint(p);
 			got_data_checksum_version = true;
 		}
+		else if ((p = strstr(bufin, "encryption fingerprint")) != NULL)
+		{
+			int			i;
+
+			p = strchr(p, ':');
+
+			if (p == NULL || strlen(p) <= 1)
+				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+
+			cluster->controldata.data_encrypted = true;
+
+			/* Skip the colon and any whitespace after it */
+			p = strchr(p, ':');
+			if (p == NULL || strlen(p) <= 1)
+				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+			p = strpbrk(p, "01234567890ABCDEF");
+			if (p == NULL || strlen(p) <= 1)
+				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+
+			/* Make sure it looks like a valid finerprint */
+			if (strspn(p, "0123456789ABCDEF") != 32)
+				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+
+			for (i = 0; i < ENCRYPTION_SAMPLE_SIZE; i++)
+				sscanf(p + 2 * i, "%2hhx",
+					   cluster->controldata.encryption_verification + i);
+		}
 	}
 
 	pclose(output);
@@ -670,6 +698,18 @@ check_control_data(ControlData *oldctrl,
 		pg_fatal("old cluster uses data checksums but the new one does not\n");
 	else if (oldctrl->data_checksum_version != newctrl->data_checksum_version)
 		pg_fatal("old and new cluster pg_controldata checksum versions do not match\n");
+
+	if (oldctrl->data_encrypted && !newctrl->data_encrypted)
+		pg_fatal("old cluster is encrypted, but the new one is not\n");
+	else if (!oldctrl->data_encrypted && newctrl->data_encrypted)
+		pg_fatal("old cluster is not encrypted, but the new one is\n");
+	else if (oldctrl->data_encrypted)
+	{
+		if (memcmp(oldctrl->encryption_verification,
+				   newctrl->encryption_verification,
+				   ENCRYPTION_SAMPLE_SIZE) != 0)
+			pg_fatal("encryption of the new cluster is not compatible with encryption of the old one\n");
+	}
 }
 
 
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index c7fed24df9..6443b65261 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -13,6 +13,7 @@
 #include "common/file_perm.h"
 #include "pg_upgrade.h"
 #include "storage/bufpage.h"
+#include "storage/encryption.h"
 #include "storage/checksum.h"
 #include "storage/checksum_impl.h"
 
@@ -78,15 +79,20 @@ cloneFile(const char *src, const char *dst,
  *
  * Copies a relation file from src to dst.
  * schemaName/relName are relation's SQL name (used for error messages only).
+ *
+ * Re-encrypt each block in order to handle change of relfilenode.
  */
 void
-copyFile(const char *src, const char *dst,
-		 const char *schemaName, const char *relName)
+copyFile(const char *src, RelFileNode *src_relnode,
+		 const char *dst, RelFileNode *dst_relnode,
+		 ForkNumber forknum, int segno, const char *schemaName,
+		 const char *relName)
 {
 #ifndef WIN32
 	int			src_fd;
 	int			dest_fd;
 	char	   *buffer;
+	BlockNumber block_num = segno * RELSEG_SIZE;
 
 	if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
 		pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
@@ -102,20 +108,55 @@ copyFile(const char *src, const char *dst,
 
 	buffer = (char *) pg_malloc(COPY_BUF_SIZE);
 
-	/* perform data copying i.e read src source, write to destination */
+	/*
+	 * Perform data copying i.e read source, write to destination. As the file
+	 * can be encrypted, we only handle whole pages.
+	 */
 	while (true)
 	{
-		ssize_t		nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
+		ssize_t		nbytes_total = 0;
+
+		while (nbytes_total < COPY_BUF_SIZE)
+		{
+			ssize_t		nbytes;
+
+			nbytes = read(src_fd, buffer + nbytes_total, COPY_BUF_SIZE - nbytes_total);
+			nbytes_total += nbytes;
+
+			if (nbytes < 0)
+				pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
+						 schemaName, relName, src, strerror(errno));
+
+			if (nbytes == 0)
+				break;
+		}
 
-		if (nbytes < 0)
+		/*
+		 * The buffer is not necessarily full but the data must end at page
+		 * boundary.
+		 */
+		if ((nbytes_total % BLCKSZ) != 0)
 			pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
 					 schemaName, relName, src, strerror(errno));
 
-		if (nbytes == 0)
+		if (nbytes_total == 0)
 			break;
 
+		/* Re-encrypt the block(s) if copying changes encryption tweak. */
+		if (encryption_enabled &&
+			(src_relnode->spcNode != dst_relnode->spcNode ||
+			 src_relnode->dbNode != dst_relnode->dbNode ||
+			 src_relnode->relNode != dst_relnode->relNode))
+			block_num = ReencryptBlock(buffer,
+									   nbytes_total / BLCKSZ,
+									   src_relnode,
+									   dst_relnode,
+									   forknum,
+									   forknum,
+									   block_num);
+
 		errno = 0;
-		if (write(dest_fd, buffer, nbytes) != nbytes)
+		if (write(dest_fd, buffer, nbytes_total) != nbytes_total)
 		{
 			/* if write didn't set errno, assume problem is no disk space */
 			if (errno == 0)
@@ -130,7 +171,7 @@ copyFile(const char *src, const char *dst,
 	close(dest_fd);
 
 #else							/* WIN32 */
-
+	/* TODO Re-encrypt if needed. */
 	if (CopyFile(src, dst, true) == 0)
 	{
 		_dosmaperr(GetLastError());
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 902bfc647e..4500a14a26 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -185,6 +185,10 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 		map->old_tablespace = old_rel->tablespace;
 		map->old_tablespace_suffix = old_cluster.tablespace_suffix;
 	}
+	if (old_rel->tablespace_oid != InvalidOid)
+		map->old_tablespace_oid = old_rel->tablespace_oid;
+	else
+		map->old_tablespace_oid = old_db->db_tablespace_oid;
 
 	/* Do the same for new tablespaces */
 	if (strlen(new_rel->tablespace) == 0)
@@ -197,6 +201,10 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 		map->new_tablespace = new_rel->tablespace;
 		map->new_tablespace_suffix = new_cluster.tablespace_suffix;
 	}
+	if (new_rel->tablespace_oid != InvalidOid)
+		map->new_tablespace_oid = new_rel->tablespace_oid;
+	else
+		map->new_tablespace_oid = new_db->db_tablespace_oid;
 
 	map->old_db_oid = old_db->db_oid;
 	map->new_db_oid = new_db->db_oid;
@@ -351,12 +359,13 @@ get_db_infos(ClusterInfo *cluster)
 				i_encoding,
 				i_datcollate,
 				i_datctype,
-				i_spclocation;
+				i_spclocation,
+				i_tablespace;
 	char		query[QUERY_ALLOC];
 
 	snprintf(query, sizeof(query),
 			 "SELECT d.oid, d.datname, d.encoding, d.datcollate, d.datctype, "
-			 "%s AS spclocation "
+			 "%s AS spclocation, d.dattablespace "
 			 "FROM pg_catalog.pg_database d "
 			 " LEFT OUTER JOIN pg_catalog.pg_tablespace t "
 			 " ON d.dattablespace = t.oid "
@@ -375,6 +384,7 @@ get_db_infos(ClusterInfo *cluster)
 	i_datcollate = PQfnumber(res, "datcollate");
 	i_datctype = PQfnumber(res, "datctype");
 	i_spclocation = PQfnumber(res, "spclocation");
+	i_tablespace = PQfnumber(res, "dattablespace");
 
 	ntups = PQntuples(res);
 	dbinfos = (DbInfo *) pg_malloc(sizeof(DbInfo) * ntups);
@@ -388,6 +398,7 @@ get_db_infos(ClusterInfo *cluster)
 		dbinfos[tupnum].db_ctype = pg_strdup(PQgetvalue(res, tupnum, i_datctype));
 		snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s",
 				 PQgetvalue(res, tupnum, i_spclocation));
+		dbinfos[tupnum].db_tablespace_oid = atooid(PQgetvalue(res, tupnum, i_tablespace));
 	}
 	PQclear(res);
 
@@ -571,7 +582,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 		curr->tblsp_alloc = false;
 
 		/* Is the tablespace oid non-default? */
-		if (atooid(PQgetvalue(res, relnum, i_reltablespace)) != 0)
+		curr->tablespace_oid = atooid(PQgetvalue(res, relnum, i_reltablespace));
+		if (curr->tablespace_oid != 0)
 		{
 			/*
 			 * The tablespace location might be "", meaning the cluster
@@ -655,9 +667,10 @@ print_rel_infos(RelInfoArr *rel_arr)
 	int			relnum;
 
 	for (relnum = 0; relnum < rel_arr->nrels; relnum++)
-		pg_log(PG_VERBOSE, "relname: %s.%s: reloid: %u reltblspace: %s\n",
+		pg_log(PG_VERBOSE, "relname: %s.%s: reloid: %u reltblspace: %s reltblspaceoid: %u\n",
 			   rel_arr->rels[relnum].nspname,
 			   rel_arr->rels[relnum].relname,
 			   rel_arr->rels[relnum].reloid,
-			   rel_arr->rels[relnum].tablespace);
+			   rel_arr->rels[relnum].tablespace,
+			   rel_arr->rels[relnum].tablespace_oid);
 }
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 0b304bbd56..402e4dff3c 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -36,13 +36,14 @@
 
 
 #include "postgres_fe.h"
-
 #include "pg_upgrade.h"
+
 #include "catalog/pg_class_d.h"
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "fe_utils/logging.h"
 #include "fe_utils/string_utils.h"
+#include "storage/encryption.h"
 
 #ifdef HAVE_LANGINFO_H
 #include <langinfo.h>
@@ -71,6 +72,7 @@ char	   *output_files[] = {
 	NULL
 };
 
+bool		encryption_enabled = false;
 
 int
 main(int argc, char **argv)
@@ -78,6 +80,7 @@ main(int argc, char **argv)
 	char	   *analyze_script_file_name = NULL;
 	char	   *deletion_script_file_name = NULL;
 	bool		live_check = false;
+	char		keycmd_opt_str[MAX_STRING];
 
 	pg_logging_init(argv[0]);
 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_upgrade"));
@@ -103,6 +106,28 @@ main(int argc, char **argv)
 
 	check_cluster_compatibility(live_check);
 
+	/*
+	 * If encryption is in place, we expect that some files will need
+	 * re-encryption due to change of their RelFileNode, so neither link nor
+	 * clone helps. (We might copy files that need re-encryption and link the
+	 * others, but not sure it's worth the effort.)
+	 *
+	 * By now we've checked that the clusters are both encrypted or both
+	 * unencrypted, so check data_encrypted of any one of them.
+	 */
+	if (old_cluster.controldata.data_encrypted)
+	{
+		if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
+			pg_fatal("link mode cannot be used for encrypted instance\n");
+		else if (user_opts.transfer_mode == TRANSFER_MODE_CLONE)
+			pg_fatal("clone mode cannot be used for encrypted instance\n");
+
+		/*
+		 * Encryption key will be needed for file re-encryption.
+		 */
+		check_encryption_keys();
+	}
+
 	/* Set mask based on PGDATA permissions */
 	if (!GetDataDirectoryCreatePerm(new_cluster.pgdata))
 	{
@@ -133,7 +158,15 @@ main(int argc, char **argv)
 
 	/*
 	 * Destructive Changes to New Cluster
+	 *
+	 * We'll deal with encrypted files, so prepare for it.
 	 */
+	if (new_cluster.encryption_key_command)
+	{
+		encryption_key_command = pg_strdup(new_cluster.encryption_key_command);
+		setup_encryption(false, NULL);
+		encryption_enabled = true;
+	}
 
 	copy_xact_xlog_xid();
 
@@ -160,6 +193,12 @@ main(int argc, char **argv)
 	transfer_all_new_tablespaces(&old_cluster.dbarr, &new_cluster.dbarr,
 								 old_cluster.pgdata, new_cluster.pgdata);
 
+	if (new_cluster.encryption_key_command)
+		snprintf(keycmd_opt_str, sizeof(keycmd_opt_str),
+				 " -K %s", new_cluster.encryption_key_command);
+	else
+		keycmd_opt_str[0] = '\0';
+
 	/*
 	 * Assuming OIDs are only used in system tables, there is no need to
 	 * restore the OID counter because we have not transferred any OIDs from
@@ -168,8 +207,10 @@ main(int argc, char **argv)
 	 */
 	prep_status("Setting next OID for new cluster");
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-			  "\"%s/pg_resetwal\" -o %u \"%s\"",
-			  new_cluster.bindir, old_cluster.controldata.chkpnt_nxtoid,
+			  "\"%s/pg_resetwal\"%s -o %u \"%s\"",
+			  new_cluster.bindir,
+			  keycmd_opt_str,
+			  old_cluster.controldata.chkpnt_nxtoid,
 			  new_cluster.pgdata);
 	check_ok();
 
@@ -461,6 +502,8 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
 static void
 copy_xact_xlog_xid(void)
 {
+	char		keycmd_opt_str[MAX_STRING];
+
 	/*
 	 * Copy old commit logs to new data dir. pg_clog has been renamed to
 	 * pg_xact in post-10 clusters.
@@ -470,20 +513,31 @@ copy_xact_xlog_xid(void)
 					  GET_MAJOR_VERSION(new_cluster.major_version) < 1000 ?
 					  "pg_clog" : "pg_xact");
 
+	if (new_cluster.encryption_key_command)
+		snprintf(keycmd_opt_str, sizeof(keycmd_opt_str),
+				 " -K %s", new_cluster.encryption_key_command);
+	else
+		keycmd_opt_str[0] = '\0';
+
 	/* set the next transaction id and epoch of the new cluster */
 	prep_status("Setting next transaction ID and epoch for new cluster");
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-			  "\"%s/pg_resetwal\" -f -x %u \"%s\"",
-			  new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid,
+			  "\"%s/pg_resetwal\"%s -f -x %u \"%s\"",
+			  new_cluster.bindir,
+			  keycmd_opt_str,
+			  old_cluster.controldata.chkpnt_nxtxid,
 			  new_cluster.pgdata);
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-			  "\"%s/pg_resetwal\" -f -e %u \"%s\"",
-			  new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch,
+			  "\"%s/pg_resetwal\"%s -f -e %u \"%s\"",
+			  new_cluster.bindir,
+			  keycmd_opt_str,
+			  old_cluster.controldata.chkpnt_nxtepoch,
 			  new_cluster.pgdata);
 	/* must reset commit timestamp limits also */
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-			  "\"%s/pg_resetwal\" -f -c %u,%u \"%s\"",
+			  "\"%s/pg_resetwal\"%s -f -c %u,%u \"%s\"",
 			  new_cluster.bindir,
+			  keycmd_opt_str,
 			  old_cluster.controldata.chkpnt_nxtxid,
 			  old_cluster.controldata.chkpnt_nxtxid,
 			  new_cluster.pgdata);
@@ -508,8 +562,9 @@ copy_xact_xlog_xid(void)
 		 * counters here and the oldest multi present on system.
 		 */
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
+				  "\"%s/pg_resetwal\"%s -O %u -m %u,%u \"%s\"",
 				  new_cluster.bindir,
+				  keycmd_opt_str,
 				  old_cluster.controldata.chkpnt_nxtmxoff,
 				  old_cluster.controldata.chkpnt_nxtmulti,
 				  old_cluster.controldata.chkpnt_oldstMulti,
@@ -536,8 +591,9 @@ copy_xact_xlog_xid(void)
 		 * next=MaxMultiXactId, but multixact.c can cope with that just fine.
 		 */
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/pg_resetwal\" -m %u,%u \"%s\"",
+				  "\"%s/pg_resetwal\"%s -m %u,%u \"%s\"",
 				  new_cluster.bindir,
+				  keycmd_opt_str,
 				  old_cluster.controldata.chkpnt_nxtmulti + 1,
 				  old_cluster.controldata.chkpnt_nxtmulti,
 				  new_cluster.pgdata);
@@ -548,7 +604,8 @@ copy_xact_xlog_xid(void)
 	prep_status("Resetting WAL archives");
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
 	/* use timeline 1 to match controldata and no WAL history file */
-			  "\"%s/pg_resetwal\" -l 00000001%s \"%s\"", new_cluster.bindir,
+			  "\"%s/pg_resetwal\"%s -l 00000001%s \"%s\"", new_cluster.bindir,
+			  keycmd_opt_str,
 			  old_cluster.controldata.nextxlogfile + 8,
 			  new_cluster.pgdata);
 	check_ok();
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index baeb8ff0f8..757047360b 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -11,6 +11,8 @@
 #include <sys/time.h>
 
 #include "libpq-fe.h"
+#include "catalog/pg_control.h"
+#include "storage/relfilenode.h"
 
 /* Use port in the private/dynamic port number range */
 #define DEF_PGUPORT			50432
@@ -145,6 +147,7 @@ typedef struct
 	Oid			indtable;		/* if index, OID of its table, else 0 */
 	Oid			toastheap;		/* if toast table, OID of base table, else 0 */
 	char	   *tablespace;		/* tablespace path; "" for cluster default */
+	Oid			tablespace_oid; /* tablespace OID */
 	bool		nsp_alloc;		/* should nspname be freed? */
 	bool		tblsp_alloc;	/* should tablespace be freed? */
 	int32		relpages;		/* # of pages -- see pg_class.h */
@@ -164,6 +167,8 @@ typedef struct
 {
 	const char *old_tablespace;
 	const char *new_tablespace;
+	Oid			old_tablespace_oid;
+	Oid			new_tablespace_oid;
 	const char *old_tablespace_suffix;
 	const char *new_tablespace_suffix;
 	Oid			old_db_oid;
@@ -193,6 +198,7 @@ typedef struct
 	char	   *db_name;		/* database name */
 	char		db_tablespace[MAXPGPATH];	/* database default tablespace
 											 * path */
+	Oid			db_tablespace_oid;
 	char	   *db_collate;
 	char	   *db_ctype;
 	int			db_encoding;
@@ -233,6 +239,8 @@ typedef struct
 	bool		date_is_int;
 	bool		float8_pass_by_value;
 	bool		data_checksum_version;
+	bool		data_encrypted;
+	uint8		encryption_verification[ENCRYPTION_SAMPLE_SIZE];
 } ControlData;
 
 /*
@@ -282,6 +290,8 @@ typedef struct
 	char		major_version_str[64];	/* string PG_VERSION of cluster */
 	uint32		bin_version;	/* version returned from pg_ctl */
 	const char *tablespace_suffix;	/* directory specification */
+	const char *encryption_key_command; /* command to retrieve encryption key
+										 * or passphrase */
 } ClusterInfo;
 
 
@@ -339,6 +349,7 @@ extern UserOpts user_opts;
 extern ClusterInfo old_cluster,
 			new_cluster;
 extern OSInfo os_info;
+extern bool encryption_enabled;
 
 
 /* check.c */
@@ -352,6 +363,7 @@ void output_completion_banner(char *analyze_script_file_name,
 						 char *deletion_script_file_name);
 void		check_cluster_versions(void);
 void		check_cluster_compatibility(bool live_check);
+void		check_encryption_keys(void);
 void		create_script_for_old_cluster_deletion(char **deletion_script_file_name);
 void		create_script_for_cluster_analyze(char **analyze_script_file_name);
 
@@ -382,8 +394,10 @@ bool		pid_lock_file_exists(const char *datadir);
 
 void cloneFile(const char *src, const char *dst,
 		  const char *schemaName, const char *relName);
-void copyFile(const char *src, const char *dst,
-		 const char *schemaName, const char *relName);
+void copyFile(const char *src, RelFileNode *src_relnode,
+		 const char *dst, RelFileNode *dst_relnode,
+		 ForkNumber forknum, int segno, const char *schemaName,
+		 const char *relName);
 void linkFile(const char *src, const char *dst,
 		 const char *schemaName, const char *relName);
 void rewriteVisibilityMap(const char *fromfile, const char *tofile,
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index dd3c8cefe4..7c4c9046d3 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -14,11 +14,13 @@
 #include <sys/stat.h>
 #include "catalog/pg_class_d.h"
 #include "access/transam.h"
+#include "storage/encryption.h"
 #include "storage/freespace.h"
 
 
 static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
+static void transfer_relfile(FileNameMap *map, const char *suffix, ForkNumber forknum,
+				 bool vm_must_add_frozenbit);
 static bool new_cluster_needs_fsm(FileNameMap *map);
 
 
@@ -168,7 +170,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 			strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
 		{
 			/* transfer primary file */
-			transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit);
+			transfer_relfile(&maps[mapnum], "", MAIN_FORKNUM,
+							 vm_must_add_frozenbit);
 
 			/* fsm/vm files added in PG 8.4 */
 			if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -177,9 +180,10 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 				 * Copy/link any fsm and vm files, if they exist
 				 */
 				if (new_cluster_needs_fsm(&maps[mapnum]))
-					transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
+					transfer_relfile(&maps[mapnum], "_fsm", FSM_FORKNUM, vm_must_add_frozenbit);
 				if (vm_crashsafe_match)
-					transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
+					transfer_relfile(&maps[mapnum], "_vm", VISIBILITYMAP_FORKNUM,
+									 vm_must_add_frozenbit);
 			}
 		}
 	}
@@ -194,7 +198,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
  * mode.
  */
 static void
-transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit)
+transfer_relfile(FileNameMap *map, const char *type_suffix, ForkNumber forknum,
+				 bool vm_must_add_frozenbit)
 {
 	char		old_file[MAXPGPATH];
 	char		new_file[MAXPGPATH];
@@ -256,6 +261,14 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 
 		if (vm_must_add_frozenbit && strcmp(type_suffix, "_vm") == 0)
 		{
+			/*
+			 * Encryption is not supported for the old versions that need to
+			 * rewrite the visibility map. Something bad must have happened if
+			 * this condition is met.
+			 */
+			if (encryption_enabled)
+				pg_fatal("the old cluster is too old to be encrypted\n");
+
 			/* Need to rewrite visibility map format */
 			pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"\n",
 				   old_file, new_file);
@@ -270,9 +283,24 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 					cloneFile(old_file, new_file, map->nspname, map->relname);
 					break;
 				case TRANSFER_MODE_COPY:
-					pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
-						   old_file, new_file);
-					copyFile(old_file, new_file, map->nspname, map->relname);
+					{
+						RelFileNode old_relnode,
+									new_relnode;
+
+						pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
+							   old_file, new_file);
+
+						old_relnode.spcNode = map->old_tablespace_oid;
+						old_relnode.dbNode = map->old_db_oid;
+						old_relnode.relNode = map->old_relfilenode;
+						new_relnode.spcNode = map->new_tablespace_oid;
+						new_relnode.dbNode = map->new_db_oid;
+						new_relnode.relNode = map->new_relfilenode;
+
+						copyFile(old_file, &old_relnode,
+								 new_file, &new_relnode, forknum,
+								 segno, map->nspname, map->relname);
+					}
 					break;
 				case TRANSFER_MODE_LINK:
 					pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n",
diff --git a/src/bin/pg_waldump/Makefile b/src/bin/pg_waldump/Makefile
index bfe9498b80..e673d6fdd6 100644
--- a/src/bin/pg_waldump/Makefile
+++ b/src/bin/pg_waldump/Makefile
@@ -7,7 +7,7 @@ subdir = src/bin/pg_waldump
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = pg_waldump.o compat.o xlogreader.o rmgrdesc.o \
+OBJS = pg_waldump.o compat.o xlogreader.o rmgrdesc.o encryption.o \
 	$(RMGRDESCOBJS) $(WIN32RES)
 
 override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
@@ -28,6 +28,9 @@ xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
 $(RMGRDESCSOURCES): % : $(top_srcdir)/src/backend/access/rmgrdesc/%
 	rm -f $@ && $(LN_S) $< .
 
+encryption.c: % : $(top_srcdir)/src/backend/storage/file/%
+	rm -f $@ && $(LN_S) $< .
+
 install: all installdirs
 	$(INSTALL_PROGRAM) pg_waldump$(X) '$(DESTDIR)$(bindir)/pg_waldump$(X)'
 
@@ -38,4 +41,5 @@ uninstall:
 	rm -f '$(DESTDIR)$(bindir)/pg_waldump$(X)'
 
 clean distclean maintainer-clean:
-	rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c
+	rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c \
+encryption.c
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index d37e9f0817..792ac9fed3 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -22,6 +22,7 @@
 #include "access/transam.h"
 #include "common/fe_memutils.h"
 #include "fe_utils/logging.h"
+#include "storage/encryption.h"
 #include "getopt_long.h"
 #include "rmgrdesc.h"
 
@@ -194,27 +195,59 @@ search_directory(const char *directory, const char *fname)
 		PGAlignedXLogBlock buf;
 		int			r;
 
-		r = read(fd, buf.data, XLOG_BLCKSZ);
-		if (r == XLOG_BLCKSZ)
+		if (data_encrypted)
 		{
-			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
-
-			WalSegSz = longhdr->xlp_seg_size;
+			/*
+			 * Segment size affects calculation of segNo and thus also the
+			 * encryption tweak, so we cannot get the size from the header
+			 * until the page is decrypted. Here we need to take more
+			 * expensive approach and really check the file size.
+			 */
+			WalSegSz = (int) lseek(fd, 0, SEEK_END);
+			if (WalSegSz <= 0)
+				fatal_error("Could not determine size of WAL segment \"%s\"", fname);
 
+			/*
+			 * Verification of the file size is the only useful thing we can
+			 * do. If anything else is wrong, the XLOG reader should find out
+			 * after decryption.
+			 */
 			if (!IsValidWalSegSize(WalSegSz))
-				fatal_error(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d byte",
-									 "WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d bytes",
+				fatal_error(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but size of the WAL file \"%s\" is %d byte",
+									 "WAL segment size must be a power of two between 1 MB and 1 GB, but size of the WAL file \"%s\" is %d bytes",
 									 WalSegSz),
 							fname, WalSegSz);
 		}
 		else
 		{
-			if (errno != 0)
-				fatal_error("could not read file \"%s\": %s",
-							fname, strerror(errno));
+			r = read(fd, buf.data, XLOG_BLCKSZ);
+			if (r == XLOG_BLCKSZ)
+			{
+				XLogLongPageHeader longhdr;
+
+				longhdr = (XLogLongPageHeader) buf.data;
+
+				if (longhdr->std.xlp_magic != XLOG_PAGE_MAGIC)
+					fatal_error(gettext("WAL segment \"%s\" has an incorrect magic number. If it's encrypted, use the -K option to pass encryption credentials"),
+								fname);
+
+				WalSegSz = longhdr->xlp_seg_size;
+
+				if (!IsValidWalSegSize(WalSegSz))
+					fatal_error(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d byte",
+										 "WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d bytes",
+										 WalSegSz),
+								fname, WalSegSz);
+			}
 			else
-				fatal_error("could not read file \"%s\": read %d of %zu",
-							fname, r, (Size) XLOG_BLCKSZ);
+			{
+				if (errno != 0)
+					fatal_error("could not read file \"%s\": %s",
+								fname, strerror(errno));
+				else
+					fatal_error("could not read file \"%s\": read %d of %zu",
+								fname, r, (Size) XLOG_BLCKSZ);
+			}
 		}
 		close(fd);
 		return true;
@@ -444,6 +477,19 @@ XLogDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
 	XLogDumpXLogRead(private->inpath, private->timeline, targetPagePtr,
 					 readBuff, count);
 
+	if (data_encrypted)
+	{
+		char		tweak[TWEAK_SIZE];
+		XLogSegNo	readSegNo;
+		uint32		readSegOff;
+
+		XLByteToSeg(targetPagePtr, readSegNo, WalSegSz);
+		readSegOff = targetPagePtr % WalSegSz;
+
+		XLogEncryptionTweak(tweak, private->timeline, readSegNo, readSegOff);
+		decrypt_block(readBuff, readBuff, count, tweak, true);
+	}
+
 	return count;
 }
 
@@ -791,7 +837,15 @@ usage(void)
 	printf(_("\nOptions:\n"));
 	printf(_("  -b, --bkp-details      output detailed information about backup blocks\n"));
 	printf(_("  -e, --end=RECPTR       stop reading at WAL location RECPTR\n"));
+#ifdef	USE_ENCRYPTION
+	/* Data directory may be needed to locate KDF parameters file. */
+	printf(_("  -D DATADIR             data directory\n"));
+#endif							/* USE_ENCRYPTION */
 	printf(_("  -f, --follow           keep retrying after reaching end of WAL\n"));
+#ifdef	USE_ENCRYPTION
+	printf(_("  -K, --encryption-key-command=COMMAND\n"
+			 "                         command that returns encryption key\n"));
+#endif							/* USE_ENCRYPTION */
 	printf(_("  -n, --limit=N          number of records to display\n"));
 	printf(_("  -p, --path=PATH        directory in which to find log segment files or a\n"
 			 "                         directory with a ./pg_wal that contains such files\n"
@@ -816,6 +870,7 @@ main(int argc, char **argv)
 	XLogReaderState *xlogreader_state;
 	XLogDumpPrivate private;
 	XLogDumpConfig config;
+	char	   *DataDir = NULL;
 	XLogDumpStats stats;
 	XLogRecord *record;
 	XLogRecPtr	first_record;
@@ -826,6 +881,7 @@ main(int argc, char **argv)
 		{"end", required_argument, NULL, 'e'},
 		{"follow", no_argument, NULL, 'f'},
 		{"help", no_argument, NULL, '?'},
+		{"encryption-key-command", required_argument, NULL, 'K'},
 		{"limit", required_argument, NULL, 'n'},
 		{"path", required_argument, NULL, 'p'},
 		{"rmgr", required_argument, NULL, 'r'},
@@ -869,7 +925,7 @@ main(int argc, char **argv)
 		goto bad_argument;
 	}
 
-	while ((option = getopt_long(argc, argv, "be:?fn:p:r:s:t:Vx:z",
+	while ((option = getopt_long(argc, argv, "bD:e:?fK:n:p:r:s:t:Vx:z",
 								 long_options, &optindex)) != -1)
 	{
 		switch (option)
@@ -877,6 +933,9 @@ main(int argc, char **argv)
 			case 'b':
 				config.bkp_details = true;
 				break;
+			case 'D':
+				DataDir = optarg;
+				break;
 			case 'e':
 				if (sscanf(optarg, "%X/%X", &xlogid, &xrecoff) != 2)
 				{
@@ -893,6 +952,12 @@ main(int argc, char **argv)
 				usage();
 				exit(EXIT_SUCCESS);
 				break;
+#ifdef	USE_ENCRYPTION
+			case 'K':
+				encryption_key_command = pg_strdup(optarg);
+				data_encrypted = true;
+				break;
+#endif							/* USE_ENCRYPTION */
 			case 'n':
 				if (sscanf(optarg, "%d", &config.stop_after_records) != 1)
 				{
@@ -1092,6 +1157,9 @@ main(int argc, char **argv)
 
 	/* done with argument parsing, do the actual work */
 
+	if (data_encrypted)
+		setup_encryption(false, DataDir);
+
 	/* we have everything we need, start reading */
 	xlogreader_state = XLogReaderAllocate(WalSegSz, XLogDumpReadPage,
 										  &private);
diff --git a/src/common/string.c b/src/common/string.c
index b01a56ceaa..5ba890d37d 100644
--- a/src/common/string.c
+++ b/src/common/string.c
@@ -42,6 +42,45 @@ pg_str_endswith(const char *str, const char *end)
 	return strcmp(str, end) == 0;
 }
 
+/*
+ * Helper function to check if a page is completely empty.
+ *
+ * TODO Invent name that is more consistent with that of the other function(s)
+ * in this module.
+ */
+bool
+IsAllZero(const char *input, Size size)
+{
+	const char *pos = input;
+	const char *aligned_start = (char *) MAXALIGN64(input);
+	const char *end = input + size;
+
+	/* Check 1 byte at a time until pos is 8 byte aligned */
+	while (pos < aligned_start)
+		if (*pos++ != 0)
+			return false;
+
+	/*
+	 * Run 8 parallel 8 byte checks in one iteration. On 2016 hardware
+	 * slightly faster than 4 parallel checks.
+	 */
+	while (pos + 8 * sizeof(uint64) <= end)
+	{
+		uint64	   *p = (uint64 *) pos;
+
+		if ((p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7]) != 0)
+			return false;
+		pos += 8 * sizeof(uint64);
+	}
+
+	/* Handle unaligned tail. */
+	while (pos < end)
+		if (*pos++ != 0)
+			return false;
+
+	return true;
+}
+
 
 /*
  * strtoint --- just like strtol, but returns int not long
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1ead0256ad..35a6264d19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -19,6 +19,7 @@
 #include "lib/stringinfo.h"
 #include "nodes/pg_list.h"
 #include "storage/fd.h"
+#include "storage/encryption.h"
 
 
 /* Sync methods */
@@ -186,10 +187,19 @@ extern PGDLLIMPORT int wal_level;
  * Normally, we don't WAL-log hint bit updates, but if checksums are enabled,
  * we have to protect them against torn page writes.  When you only set
  * individual bits on a page, it's still consistent no matter what combination
- * of the bits make it to disk, but the checksum wouldn't match.  Also WAL-log
- * them if forced by wal_log_hints=on.
+ * of the bits make it to disk, but the checksum wouldn't match.
+ *
+ * Regardless checksums, if encryption is enabled, hint bit change followed by
+ * a torn page write can result in such a situation that decryption produces
+ * page whose contents (following the hint bit) is garbage. This is because
+ * the block cipher we use propagates changes in the lower addresses of plain
+ * data to higher addresses of the cipher data. We need full-page image also
+ * to recover from this state.
+ *
+ * Also WAL-log the hint bits if forced by wal_log_hints=on.
  */
-#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints)
+#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || data_encrypted || \
+							   wal_log_hints)
 
 /* Do we need to WAL-log information required only for Hot Standby and logical replication? */
 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA)
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index ff98d9e91a..927aa2ff61 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -94,6 +94,11 @@ typedef enum DBState
 } DBState;
 
 /*
+ * Number of bytes reserved to store encryption sample in ControlFileData.
+ */
+#define ENCRYPTION_SAMPLE_SIZE 16
+
+/*
  * Contents of pg_control.
  */
 
@@ -228,6 +233,16 @@ typedef struct ControlFileData
 	 */
 	char		mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
 
+	/*
+	 * Cipher used to encrypt data. Zero if unencrypted.
+	 *
+	 * The data type is actually CipherKind, but we don't want to include
+	 * encryption.h just because of this field.
+	 */
+	uint8		data_cipher;
+	/* Sample value for encryption key verification */
+	uint8		encryption_verification[ENCRYPTION_SAMPLE_SIZE];
+
 	/* CRC of all above ... MUST BE LAST! */
 	pg_crc32c	crc;
 } ControlFileData;
diff --git a/src/include/common/string.h b/src/include/common/string.h
index f3467f0ee4..bcdeffba2b 100644
--- a/src/include/common/string.h
+++ b/src/include/common/string.h
@@ -14,5 +14,6 @@ extern bool pg_str_endswith(const char *str, const char *end);
 extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr,
 		 int base);
 extern void pg_clean_ascii(char *str);
+extern bool IsAllZero(const char *input, Size size);
 
 #endif							/* COMMON_STRING_H */
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 61283806b6..d01f2c960c 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -431,6 +431,7 @@ extern void BaseInit(void);
 /* in utils/init/miscinit.c */
 extern bool IgnoreSystemIndexes;
 extern PGDLLIMPORT bool process_shared_preload_libraries_in_progress;
+extern char *encryption_library_string;
 extern char *session_preload_libraries_string;
 extern char *shared_preload_libraries_string;
 extern char *local_preload_libraries_string;
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 743401cb96..b716fb4d13 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -172,6 +172,13 @@
 #endif
 
 /*
+ * OpenSSL is currently the only implementation of encryption we use.
+ */
+#ifdef USE_OPENSSL
+#define USE_ENCRYPTION
+#endif
+
+/*
  * This is the default directory in which AF_UNIX socket files are
  * placed.  Caution: changing this risks breaking your existing client
  * applications, which are likely to continue to look in the old
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 3bedd268a9..4e3058784b 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -894,6 +894,9 @@ typedef enum
 	WAIT_EVENT_DATA_FILE_TRUNCATE,
 	WAIT_EVENT_DATA_FILE_WRITE,
 	WAIT_EVENT_DSM_FILL_ZERO_WRITE,
+	WAIT_EVENT_KDF_FILE_READ,
+	WAIT_EVENT_KDF_FILE_SYNC,
+	WAIT_EVENT_KDF_FILE_WRITE,
 	WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ,
 	WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC,
 	WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE,
diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h
index 525cc6203e..21c18bb6c3 100644
--- a/src/include/storage/copydir.h
+++ b/src/include/storage/copydir.h
@@ -13,7 +13,9 @@
 #ifndef COPYDIR_H
 #define COPYDIR_H
 
-extern void copydir(char *fromdir, char *todir, bool recurse);
-extern void copy_file(char *fromfile, char *tofile);
+#include "storage/relfilenode.h"
+
+extern void copydir(char *fromdir, char *todir, RelFileNode *fromNode, RelFileNode *toNode);
+extern void copy_file(char *fromfile, char *tofile, RelFileNode *fromNode, RelFileNode *toNode, ForkNumber fromForkNum, ForkNumber toForkNum, int segment);
 
 #endif							/* COPYDIR_H */
diff --git a/src/include/storage/encryption.h b/src/include/storage/encryption.h
new file mode 100644
index 0000000000..93c951488a
--- /dev/null
+++ b/src/include/storage/encryption.h
@@ -0,0 +1,167 @@
+/*-------------------------------------------------------------------------
+ *
+ * encryption.h
+ *	  Full database encryption support
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/encryption.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ENCRYPTION_H
+#define ENCRYPTION_H
+
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+#include "port/pg_crc32c.h"
+
+extern void encryption_error(bool fatal, char *message);
+
+/*
+ * Common error message issued when particular code path cannot be executed
+ * due to absence of the OpenSSL library.
+ */
+#define ENCRYPTION_NOT_SUPPORTED_MSG \
+	encryption_error(true, "compile postgres with --with-openssl to use encryption.")
+
+/*
+ * Full database encryption key.
+ *
+ * The key of EVP_aes_256_cbc() cipher is 256 bits long.
+ */
+#define	ENCRYPTION_KEY_LENGTH	32
+
+/*
+ * Cipher used to encrypt data.
+ *
+ * Due to very specific requirements, the ciphers are not likely to change,
+ * but we should be somewhat flexible.
+ *
+ * XXX If we have more than one cipher someday, have pg_controldata report the
+ * cipher kind (in textual form) instead of merely saying "on".
+ */
+typedef enum CipherKind
+{
+	/* The cluster is not encrypted. */
+	PG_CIPHER_NONE = 0,
+
+	/*
+	 * AES (Rijndael) in CBC mode of operation as block cipher, and in CTR
+	 * mode as stream cipher. Key length is always 256 bits.
+	 */
+	PG_CIPHER_AES_BLOCK_CBC_256_STREAM_CTR_256
+}			CipherKind;
+
+/*
+ * TODO Tune these values.
+ */
+#define ENCRYPTION_PWD_MIN_LENGTH	8
+#define ENCRYPTION_PWD_MAX_LENGTH	16
+#define ENCRYPTION_KDF_NITER		1048576
+#define	ENCRYPTION_KDF_SALT_LEN		8
+
+/* Key to encrypt / decrypt data. */
+extern unsigned char encryption_key[];
+
+/*
+ * The encrypted data is a series of blocks of size
+ * ENCRYPTION_BLOCK. Currently we use the EVP_aes_256_xts implementation. Make
+ * sure the following constants match if adopting another algorithm.
+ */
+#define ENCRYPTION_BLOCK 16
+
+#define TWEAK_SIZE 16
+
+#define KDF_PARAMS_FILE			"global/pg_keysetup"
+#define KDF_PARAMS_FILE_SIZE	512
+
+/*
+ * Key derivation function.
+ */
+typedef enum KDFKind
+{
+	KDF_OPENSSL_PKCS5_PBKDF2_HMAC_SHA = 0
+} KFDKind;
+
+typedef struct KDFParamsPBKDF2
+{
+	unsigned long int niter;
+	unsigned char salt[ENCRYPTION_KDF_SALT_LEN];
+} KDFParamsPBKDF2;
+
+/*
+ * Parameters of the key derivation function.
+ *
+ * The parameters are generated by initdb and stored into a file, which is
+ * then read during PG startup. This is similar to storing various settings in
+ * pg_control. However an existing KDF file is read only, so it does not have
+ * to be stored in shared memory.
+ */
+typedef struct KDFParamsData
+{
+	KFDKind		function;
+
+	/*
+	 * Function-specific parameters.
+	 */
+	union
+	{
+		KDFParamsPBKDF2 pbkdf2;
+	}			data;
+
+	/* CRC of all above ... MUST BE LAST! */
+	pg_crc32c	crc;
+} KDFParamsData;
+
+extern KDFParamsData *KDFParams;
+
+extern PGDLLIMPORT bool data_encrypted;
+
+extern void read_kdf_file(char *dir);
+#ifndef FRONTEND
+extern void write_kdf_file(void);
+#endif							/* FRONTEND */
+
+extern PGDLLIMPORT char *encryption_key_command;
+
+/*
+ * In some cases we need a separate copy of the data because encryption
+ * in-place (typically in the shared buffers) would make the data unusable for
+ * backends.
+ */
+extern PGAlignedBlock encrypt_buf;
+
+/*
+ * The same for XLOG. This buffer spans multiple pages, in order to reduce the
+ * number of syscalls when doing I/O.
+ *
+ * XXX Fine tune the buffer size.
+ */
+extern char *encrypt_buf_xlog;
+#define	XLOG_ENCRYPT_BUF_PAGES	8
+#define ENCRYPT_BUF_XLOG_SIZE	(XLOG_ENCRYPT_BUF_PAGES * XLOG_BLCKSZ)
+
+extern char *run_encryption_key_command(const char *cmd, bool *is_key_p, size_t *len_p);
+
+extern void setup_encryption(bool bootstrap, char *data_dir);
+extern void setup_encryption_key(char *credentials, bool is_key, size_t len);
+extern void sample_encryption(char *buf);
+extern void encrypt_block(const char *input, char *output, Size size,
+			  char *tweak, bool stream);
+extern void decrypt_block(const char *input, char *output, Size size,
+			  char *tweak, bool stream);
+extern void encryption_error(bool fatal, char *message);
+
+extern void XLogEncryptionTweak(char *tweak, TimeLineID timeline,
+					XLogSegNo segment, uint32 offset);
+extern BlockNumber ReencryptBlock(char *buffer, int blocks,
+			   RelFileNode *srcNode, RelFileNode *dstNode,
+			   ForkNumber srcForkNum, ForkNumber dstForkNum,
+			   BlockNumber blockNum);
+extern void mdtweak(char *tweak, RelFileNode *relnode, ForkNumber forknum,
+		BlockNumber blocknum);
+#endif							/* ENCRYPTION_H */
diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h
index 36eeef4aec..0e028cac4d 100644
--- a/src/include/storage/reinit.h
+++ b/src/include/storage/reinit.h
@@ -20,7 +20,8 @@
 
 extern void ResetUnloggedRelations(int op);
 extern bool parse_filename_for_nontemp_relation(const char *name,
-									int *oidchars, ForkNumber *fork);
+									int *oidchars, ForkNumber *fork,
+									int *segment);
 
 #define UNLOGGED_RELATION_CLEANUP		0x0001
 #define UNLOGGED_RELATION_INIT			0x0002
diff --git a/src/test/modules/buffile/Makefile b/src/test/modules/buffile/Makefile
new file mode 100644
index 0000000000..d0329e082f
--- /dev/null
+++ b/src/test/modules/buffile/Makefile
@@ -0,0 +1,21 @@
+PG_CONFIG ?= pg_config
+MODULE_big = buffile_test
+OBJS = buffile.o $(WIN32RES)
+PGFILEDESC = "buffile_test"
+
+EXTENSION = buffile
+DATA = buffile--1.0.sql
+
+REGRESS = test_00 test_01 test_02 test_03 test_04 test_05 test_06 test_07 \
+test_08 test_09 test_10 test_11 test_12 test_13
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/buffile
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/buffile/README b/src/test/modules/buffile/README
new file mode 100644
index 0000000000..9c80787797
--- /dev/null
+++ b/src/test/modules/buffile/README
@@ -0,0 +1,11 @@
+This extension was written to check if changes introduced due to cluster
+encryption do not break buffile.c.
+
+Caution: To make the test cheaper, it was decided to adjust the segment sizeq,
+see
+
+#define MAX_PHYSICAL_FILESIZE	(4 * BLCKSZ)
+
+in buffile.c. BLCKSZ is 8192 (the default). All the tests rely on this
+value. So if you haven't compiled Postgres with this value, the tests will
+create 1 GB files and they will fail.
diff --git a/src/test/modules/buffile/buffile--1.0.sql b/src/test/modules/buffile/buffile--1.0.sql
new file mode 100644
index 0000000000..7c168f3514
--- /dev/null
+++ b/src/test/modules/buffile/buffile--1.0.sql
@@ -0,0 +1,54 @@
+CREATE FUNCTION buffile_create()
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_create'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_close()
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_close'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_write(text)
+RETURNS bigint
+AS 'MODULE_PATHNAME', 'buffile_write'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_read(bigint)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'buffile_read'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_seek(int, bigint)
+RETURNS int
+AS 'MODULE_PATHNAME', 'buffile_seek'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_assert_fileno(int)
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_assert_fileno'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_test_shared()
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_test_shared'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_test_shared_append()
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_test_shared_append'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_open_transient(text, bool, bool)
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_open_transient'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_close_transient()
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_close_transient'
+LANGUAGE C;
+
+CREATE FUNCTION buffile_delete_file(text)
+RETURNS void
+AS 'MODULE_PATHNAME', 'buffile_delete_file'
+LANGUAGE C;
diff --git a/src/test/modules/buffile/buffile.c b/src/test/modules/buffile/buffile.c
new file mode 100644
index 0000000000..7fbb7939ff
--- /dev/null
+++ b/src/test/modules/buffile/buffile.c
@@ -0,0 +1,428 @@
+#include <fcntl.h>
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "lib/stringinfo.h"
+#include "storage/buffile.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * To cover various corner cases, the tests assume MAX_PHYSICAL_FILESIZE to be
+ * exactly MAX_PHYSICAL_FILESIZE_TEST.
+ */
+#define MAX_PHYSICAL_FILESIZE_TEST	(4 * BLCKSZ)
+
+static BufFile *bf = NULL;
+static TransientBufFile *bft = NULL;
+
+static void check_file(void);
+
+extern Datum buffile_create_transient(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_create);
+Datum
+buffile_create(PG_FUNCTION_ARGS)
+{
+	MemoryContext old_cxt;
+	ResourceOwner old_ro;
+
+	if (bf != NULL)
+		elog(ERROR, "file already exists");
+
+	old_cxt = MemoryContextSwitchTo(TopMemoryContext);
+
+	/*
+	 * Make sure the file is not deleted across function calls.
+	 */
+	old_ro = CurrentResourceOwner;
+	CurrentResourceOwner = TopTransactionResourceOwner;
+
+	bf = BufFileCreateTemp(false);
+
+	CurrentResourceOwner = old_ro;
+	MemoryContextSwitchTo(old_cxt);
+
+	PG_RETURN_VOID();
+}
+
+extern Datum buffile_close(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_close);
+Datum
+buffile_close(PG_FUNCTION_ARGS)
+{
+	if (bf == NULL)
+		elog(ERROR, "there's no file to close");
+
+	BufFileClose(bf);
+	bf = NULL;
+
+	PG_RETURN_VOID();
+}
+
+extern Datum buffile_write(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_write);
+Datum
+buffile_write(PG_FUNCTION_ARGS)
+{
+	Datum		d = PG_GETARG_DATUM(0);
+	char	   *s = TextDatumGetCString(d);
+	size_t		res;
+
+	if (bf)
+		res = BufFileWrite(bf, s, strlen(s));
+	else if (bft)
+		res = BufFileWriteTransient(bft, s, strlen(s));
+	else
+		elog(ERROR, "No file is open");
+
+	PG_RETURN_INT64(res);
+}
+
+extern Datum buffile_read(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_read);
+Datum
+buffile_read(PG_FUNCTION_ARGS)
+{
+	int64		size = PG_GETARG_INT64(0);
+	StringInfo	buf = makeStringInfo();
+	size_t		res_size;
+	bytea	   *result;
+
+	enlargeStringInfo(buf, size);
+
+	if (bf)
+		res_size = BufFileRead(bf, buf->data, size);
+	else if (bft)
+		res_size = BufFileReadTransient(bft, buf->data, size);
+	else
+		elog(ERROR, "No file is open");
+
+	buf->len = res_size;
+
+	result = DatumGetByteaPP(DirectFunctionCall1(bytearecv,
+												 PointerGetDatum(buf)));
+	PG_RETURN_BYTEA_P(result);
+}
+
+extern Datum buffile_seek(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_seek);
+Datum
+buffile_seek(PG_FUNCTION_ARGS)
+{
+	int32		fileno = PG_GETARG_INT32(0);
+	int64		offset = PG_GETARG_INT64(1);
+	int32		res;
+
+	check_file();
+	res = BufFileSeek(bf, fileno, offset, SEEK_SET);
+
+	PG_RETURN_INT32(res);
+}
+
+extern Datum buffile_assert_fileno(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_assert_fileno);
+Datum
+buffile_assert_fileno(PG_FUNCTION_ARGS)
+{
+	int32		fileno_expected = PG_GETARG_INT32(0);
+	int32		fileno;
+	off_t		offset;
+
+	check_file();
+	BufFileTell(bf, &fileno, &offset);
+
+	if (fileno != fileno_expected)
+	{
+		/*
+		 * Bring the backend down so that the following tests have no chance
+		 * to create the 1GB files.
+		 */
+		elog(FATAL, "file number does not match");
+	}
+
+	PG_RETURN_VOID();
+}
+
+static void
+check_file(void)
+{
+	if (bf == NULL)
+		elog(ERROR, "the file is not opened");
+}
+
+/*
+ * This test is especially important for shared encrypted files, see the
+ * comments below.
+ */
+extern Datum buffile_test_shared(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_test_shared);
+Datum
+buffile_test_shared(PG_FUNCTION_ARGS)
+{
+	dsm_segment *seg;
+	SharedFileSet *fileset;
+	BufFile    *bf_1,
+			   *bf_2;
+	char	   *data_1,
+			   *data_2,
+			   *data;
+	Size		chunk_size_1,
+				chunk_size_2;
+	int			fileno,
+				i;
+	off_t		offset,
+				res,
+				total_size;
+
+	/*
+	 * The size is not important, we actually do not need the shared memory.
+	 * The segment is only needed to initialize the fileset.
+	 */
+	seg = dsm_create(1024, 0);
+
+	/*
+	 * The fileset must survive error handling, so that dsm_detach works fine.
+	 * (The typical use case is that the fileset is in shared memory.)
+	 */
+	fileset = (SharedFileSet *) MemoryContextAlloc(TopTransactionContext,
+												   sizeof(SharedFileSet));
+	SharedFileSetInit(fileset, seg);
+
+	bf_1 = BufFileCreateShared(fileset, "file_1");
+
+	/*
+	 * Write more data than the buffer size, so that we can check that the
+	 * number of "useful bytes" word is only appended at the end of the
+	 * segment, not after each buffer.
+	 */
+	chunk_size_1 = BLCKSZ + 256;
+	data_1 = (char *) palloc(chunk_size_1);
+	memset(data_1, 1, chunk_size_1);
+	if (BufFileWrite(bf_1, data_1, chunk_size_1) != chunk_size_1)
+		elog(ERROR, "Failed to write data");
+	pfree(data_1);
+
+	/*
+	 * Enforce buffer flush (The BufFileFlush() function is not exported).
+	 * Thus the "useful bytes" metadata should appear at the current end the
+	 * first file segment. The next write will have to seek back to overwrite
+	 * the metadata.
+	 */
+	BufFileTell(bf_1, &fileno, &offset);
+	if (BufFileSeek(bf_1, 0, 0, SEEK_SET) != 0)
+		elog(ERROR, "seek failed");
+	if (BufFileSeek(bf_1, fileno, offset, SEEK_SET) != 0)
+		elog(ERROR, "seek failed");
+
+	/*
+	 * Write another chunk that does not fit into the first segment file. Thus
+	 * the "useful bytes" metadata should appear at the end of both segments.
+	 */
+	chunk_size_2 = 3 * BLCKSZ;
+	data_2 = (char *) palloc(chunk_size_2);
+	memset(data_2, 1, chunk_size_2);
+	if (BufFileWrite(bf_1, data_2, chunk_size_2) != chunk_size_2)
+		elog(ERROR, "Failed to write data");
+	pfree(data_2);
+	BufFileClose(bf_1);
+
+	/*
+	 * The word indicating the number of "useful bytes" (i.e. the actual data
+	 * w/o padding to buffer size) is stored at the end of each segment file.
+	 * Check that this metadata is read correctly.
+	 */
+	bf_2 = BufFileOpenShared(fileset, "file_1");
+	total_size = BufFileSize(bf_2);
+	if (total_size != (chunk_size_1 + chunk_size_2))
+		elog(ERROR, "Incorrect file size: %zu", total_size);
+
+	data = (char *) palloc(total_size);
+	res = BufFileRead(bf_2, data, total_size);
+	if (res != total_size)
+		elog(ERROR, "Incorrect chunk size read: %zu", res);
+	for (i = 0; i < total_size; i++)
+		if (data[i] != 1)
+			elog(ERROR, "Unexpected data read from the file");
+	pfree(data);
+	BufFileClose(bf_2);
+
+	dsm_detach(seg);
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * Test BufFileAppend().
+ */
+extern Datum buffile_test_shared_append(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_test_shared_append);
+Datum
+buffile_test_shared_append(PG_FUNCTION_ARGS)
+{
+	dsm_segment *seg;
+	SharedFileSet *fileset;
+	BufFile    *bf_1,
+			   *bf_2,
+			   *bf_3;
+	char	   *data;
+	Size		chunk_size;
+	int			i;
+	off_t		res,
+				total_size;
+
+	seg = dsm_create(1024, 0);
+
+	fileset = (SharedFileSet *) MemoryContextAlloc(TopTransactionContext,
+												   sizeof(SharedFileSet));
+	SharedFileSetInit(fileset, seg);
+
+	/*
+	 * XXX Does the chunk size matter much?
+	 */
+	chunk_size = 8;
+	data = (char *) palloc(chunk_size);
+	memset(data, 1, chunk_size);
+
+	bf_1 = BufFileCreateShared(fileset, "file_1");
+	if (BufFileWrite(bf_1, data, chunk_size) != chunk_size)
+		elog(ERROR, "Failed to write data");
+
+	bf_2 = BufFileCreateShared(fileset, "file_2");
+	if (BufFileWrite(bf_2, data, chunk_size) != chunk_size)
+		elog(ERROR, "Failed to write data");
+
+	/*
+	 * Make sure it's read-only so that BufFileAppend() can accept it as
+	 * source.
+	 */
+	BufFileClose(bf_2);
+	bf_2 = BufFileOpenShared(fileset, "file_2");
+
+	bf_3 = BufFileCreateShared(fileset, "file_3");
+	if (BufFileWrite(bf_3, data, chunk_size) != chunk_size)
+		elog(ERROR, "Failed to write data");
+	BufFileClose(bf_3);
+	bf_3 = BufFileOpenShared(fileset, "file_3");
+
+	BufFileAppend(bf_1, bf_2);
+	BufFileAppend(bf_1, bf_3);
+
+	total_size = BufFileSize(bf_1);
+
+	/*
+	 * The result should contain complete segments of bf_1 and bf_2 and the
+	 * valid part of bf_3.
+	 */
+	if (total_size != (2 * MAX_PHYSICAL_FILESIZE_TEST + chunk_size))
+		elog(ERROR, "Incorrect total size of the appended data: %zu",
+			 total_size);
+
+	/*
+	 * Check that data of the 2nd segment was decrypted correctly.
+	 */
+	if (BufFileSeek(bf_1, 1, 0, SEEK_SET) != 0)
+		elog(ERROR, "seek failed");
+	res = BufFileRead(bf_1, data, chunk_size);
+	if (res != chunk_size)
+		elog(ERROR, "Incorrect chunk size read: %zu", res);
+	for (i = 0; i < chunk_size; i++)
+		if (data[i] != 1)
+			elog(ERROR, "Unexpected data read from the file");
+
+	/*
+	 * And the same for the 3rd segment.
+	 *
+	 * TODO Reuse the code above by putting it into a function.
+	 */
+	if (BufFileSeek(bf_1, 2, 0, SEEK_SET) != 0)
+		elog(ERROR, "seek failed");
+	res = BufFileRead(bf_1, data, chunk_size);
+	if (res != chunk_size)
+		elog(ERROR, "Incorrect chunk size read: %zu", res);
+	for (i = 0; i < chunk_size; i++)
+		if (data[i] != 1)
+			elog(ERROR, "Unexpected data read from the file");
+
+	BufFileClose(bf_1);
+	dsm_detach(seg);
+	PG_RETURN_VOID();
+}
+
+extern Datum buffile_open_transient(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_open_transient);
+Datum
+buffile_open_transient(PG_FUNCTION_ARGS)
+{
+	MemoryContext old_cxt;
+	Datum		d = PG_GETARG_DATUM(0);
+	char	   *path = TextDatumGetCString(d);
+	bool		write_only = PG_GETARG_BOOL(1);
+	bool		append = PG_GETARG_BOOL(2);
+	int			flags = O_CREAT | PG_BINARY;
+
+	if (bft != NULL)
+		elog(ERROR, "file already exists");
+
+	if (write_only)
+		flags |= O_WRONLY;
+	if (append)
+		flags |= O_APPEND;
+
+	old_cxt = MemoryContextSwitchTo(TopMemoryContext);
+
+	/*
+	 * Make sure the file is not deleted across function calls.
+	 */
+	bft = BufFileOpenTransient(path, flags);
+
+	MemoryContextSwitchTo(old_cxt);
+
+	PG_RETURN_VOID();
+}
+
+extern Datum buffile_close_transient(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_close_transient);
+Datum
+buffile_close_transient(PG_FUNCTION_ARGS)
+{
+	if (bft == NULL)
+		elog(ERROR, "there's no file to close");
+
+	BufFileCloseTransient(bft);
+	bft = NULL;
+
+	PG_RETURN_VOID();
+}
+
+extern Datum buffile_delete_file(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(buffile_delete_file);
+Datum
+buffile_delete_file(PG_FUNCTION_ARGS)
+{
+	Datum		d = PG_GETARG_DATUM(0);
+	char	   *path = TextDatumGetCString(d);
+
+	if (bft != NULL)
+		elog(ERROR, "the file is still open");
+
+	PathNameDeleteTemporaryFile(path, true);
+
+	PG_RETURN_VOID();
+}
diff --git a/src/test/modules/buffile/buffile.control b/src/test/modules/buffile/buffile.control
new file mode 100644
index 0000000000..8472c5a348
--- /dev/null
+++ b/src/test/modules/buffile/buffile.control
@@ -0,0 +1,5 @@
+# buffile_test extension
+comment = 'buffile_test'
+default_version = '1.0'
+module_pathname = '$libdir/buffile_test'
+relocatable = true
diff --git a/src/test/modules/buffile/expected/test_00.out b/src/test/modules/buffile/expected/test_00.out
new file mode 100644
index 0000000000..637c33689f
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_00.out
@@ -0,0 +1,47 @@
+CREATE EXTENSION IF NOT EXISTS buffile;
+-- This test only verifies that PG is compiled with a component file size of
+-- 32 kB (i.e. 4 buffers of 8 kB) instead of 1 GB. That seems appropriate for
+-- testing. Some other tests may rely on it.
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Skip the first component file.
+SELECT buffile_seek(0, 32768);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+-- Write the first byte of the second component file. We can't simply
+-- buffile_seek() beyond the position 32768 as it would return EOF.
+SELECT buffile_write('a');
+ buffile_write 
+---------------
+             1
+(1 row)
+
+-- Enforce BufFileFlush(), which actually adds the component file.
+SELECT buffile_read(1);
+ buffile_read 
+--------------
+ \x
+(1 row)
+
+-- Check that we're in the 2nd file, i.e. the file size is as expected.
+SELECT buffile_assert_fileno(1);
+ buffile_assert_fileno 
+-----------------------
+ 
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_01.out b/src/test/modules/buffile/expected/test_01.out
new file mode 100644
index 0000000000..448bac171c
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_01.out
@@ -0,0 +1,59 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+SELECT buffile_seek(0, 1);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('abc');
+ buffile_write 
+---------------
+             3
+(1 row)
+
+SELECT buffile_seek(0, 0);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+-- Check that the trailing zeroes are not fetched.
+SELECT buffile_read(16);
+ buffile_read 
+--------------
+ \x00616263
+(1 row)
+
+-- Adjust the number of useful bytes.
+SELECT buffile_write('abc');
+ buffile_write 
+---------------
+             3
+(1 row)
+
+-- ... and check again.
+SELECT buffile_seek(0, 0);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(16);
+   buffile_read   
+------------------
+ \x00616263616263
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_02.out b/src/test/modules/buffile/expected/test_02.out
new file mode 100644
index 0000000000..f783d0cb24
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_02.out
@@ -0,0 +1,48 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+SELECT buffile_seek(0, 8189);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+-- Initialize the last 3 positions of the first buffer and the initial 3
+-- positions of the 2nd buffer.
+SELECT buffile_write('abcdef');
+ buffile_write 
+---------------
+             6
+(1 row)
+
+SELECT buffile_seek(0, 0);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+-- Read the first buffer.
+SELECT length(buffile_read(8192));
+ length 
+--------
+   8192
+(1 row)
+
+-- Only 3 bytes of the 2nd buffer should be fetched.
+SELECT length(buffile_read(8192));
+ length 
+--------
+      3
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_03.out b/src/test/modules/buffile/expected/test_03.out
new file mode 100644
index 0000000000..e899fa3b38
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_03.out
@@ -0,0 +1,27 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Read from an empty file.
+SELECT buffile_seek(0, 8);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(16);
+ buffile_read 
+--------------
+ \x
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_04.out b/src/test/modules/buffile/expected/test_04.out
new file mode 100644
index 0000000000..1f8eeabe48
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_04.out
@@ -0,0 +1,84 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Write something near the end of the first buffer, but leave some trailing
+-- space.
+SELECT buffile_seek(0, 8184);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('abcd');
+ buffile_write 
+---------------
+             4
+(1 row)
+
+-- Leave the 2nd buffer empty, as well as a few leading bytes. Thus we should
+-- get a hole that spans the whole 2nd buffer as well as a few adjacent bytes
+-- on each side.
+SELECT buffile_seek(0, 2 * 8192 + 4);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('efgh');
+ buffile_write 
+---------------
+             4
+(1 row)
+
+-- Check the initial part of the hole, which crosses the boundary of the 1st
+-- and the 2nd buffer.
+SELECT buffile_seek(0, 8184);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(16);
+            buffile_read            
+------------------------------------
+ \x61626364000000000000000000000000
+(1 row)
+
+-- Check the trailing part of the whole, which crosses the boundary of the 2nd
+-- and the 3rd buffer.
+SELECT buffile_seek(0, 2 * 8192 - 8);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(16);
+            buffile_read            
+------------------------------------
+ \x00000000000000000000000065666768
+(1 row)
+
+-- Check that the hole contains nothing but zeroes.
+SELECT buffile_seek(0, 8192 - 4);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT btrim(buffile_read(8192 + 8), '\x00');
+ btrim 
+-------
+ \x
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_05.out b/src/test/modules/buffile/expected/test_05.out
new file mode 100644
index 0000000000..6a73147711
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_05.out
@@ -0,0 +1,33 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Seek does not extend the file if it's not followed by write.
+SELECT buffile_seek(0, 1);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_seek(0, 0);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(2);
+ buffile_read 
+--------------
+ \x
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_06.out b/src/test/modules/buffile/expected/test_06.out
new file mode 100644
index 0000000000..def9af5112
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_06.out
@@ -0,0 +1,41 @@
+-- This test shows that the first component file (segment) stays empty, read
+-- stops prematurely even if it starts on that segment, even though it'd
+-- otherwise receive some data from the following one.
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+SELECT buffile_seek(0, 32768);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('a');
+ buffile_write 
+---------------
+             1
+(1 row)
+
+SELECT buffile_seek(0, 32767);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(2);
+ buffile_read 
+--------------
+ \x
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_07.out b/src/test/modules/buffile/expected/test_07.out
new file mode 100644
index 0000000000..215280f0b0
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_07.out
@@ -0,0 +1,39 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Write data at component file boundary and try to read it.
+SELECT buffile_seek(0, 32768);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('abcd');
+ buffile_write 
+---------------
+             4
+(1 row)
+
+SELECT buffile_seek(0, 32768);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(8);
+ buffile_read 
+--------------
+ \x61626364
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_08.out b/src/test/modules/buffile/expected/test_08.out
new file mode 100644
index 0000000000..7c1069930c
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_08.out
@@ -0,0 +1,39 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Write data across component file boundary and try to read it.
+SELECT buffile_seek(0, 32766);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('abcd');
+ buffile_write 
+---------------
+             4
+(1 row)
+
+SELECT buffile_seek(0, 32766);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(8);
+ buffile_read 
+--------------
+ \x61626364
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_09.out b/src/test/modules/buffile/expected/test_09.out
new file mode 100644
index 0000000000..b9d325b676
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_09.out
@@ -0,0 +1,39 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Write data across buffer boundary and try to read it.
+SELECT buffile_seek(0, 8190);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('abcd');
+ buffile_write 
+---------------
+             4
+(1 row)
+
+SELECT buffile_seek(0, 8190);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(8);
+ buffile_read 
+--------------
+ \x61626364
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_10.out b/src/test/modules/buffile/expected/test_10.out
new file mode 100644
index 0000000000..8e457fdcda
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_10.out
@@ -0,0 +1,76 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+-- Write some data at the end of the buffer.
+SELECT buffile_seek(0, 8188);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_write('abcd');
+ buffile_write 
+---------------
+             4
+(1 row)
+
+SELECT buffile_seek(0, 8189);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+-- Enforce flush with the write position not at the end of the buffer. This is
+-- special by not moving curOffset to the next buffer.
+SELECT buffile_read(1);
+ buffile_read 
+--------------
+ \x62
+(1 row)
+
+-- Therefore the next writes should eventually affect the original data. (Here
+-- we also test going directly from read to write and vice versa.)
+SELECT buffile_write('x');
+ buffile_write 
+---------------
+             1
+(1 row)
+
+SELECT buffile_read(1);
+ buffile_read 
+--------------
+ \x64
+(1 row)
+
+-- Start a new buffer, i.e. force flushing of the previous one.
+SELECT buffile_write('z');
+ buffile_write 
+---------------
+             1
+(1 row)
+
+-- Check that the 'x' and 'y' letters are in the first buffer, not in the
+-- 2nd. (We read enough data to find any non-zero bytes in the 2nd buffer.)
+SELECT buffile_seek(0, 8188);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+SELECT buffile_read(4 + 8192);
+ buffile_read 
+--------------
+ \x616278647a
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_11.out b/src/test/modules/buffile/expected/test_11.out
new file mode 100644
index 0000000000..c6804d08af
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_11.out
@@ -0,0 +1,34 @@
+BEGIN;
+SELECT buffile_create();
+ buffile_create 
+----------------
+ 
+(1 row)
+
+SELECT buffile_write('abcd');
+ buffile_write 
+---------------
+             4
+(1 row)
+
+-- Seek beyond EOF not followed by write.
+SELECT buffile_seek(0, 5);
+ buffile_seek 
+--------------
+            0
+(1 row)
+
+-- Nothing should be fetched.
+SELECT buffile_read(8);
+ buffile_read 
+--------------
+ \x
+(1 row)
+
+SELECT buffile_close();
+ buffile_close 
+---------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/expected/test_12.out b/src/test/modules/buffile/expected/test_12.out
new file mode 100644
index 0000000000..3dce7eaf0f
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_12.out
@@ -0,0 +1,12 @@
+SELECT buffile_test_shared();
+ buffile_test_shared 
+---------------------
+ 
+(1 row)
+
+SELECT buffile_test_shared_append();
+ buffile_test_shared_append 
+----------------------------
+ 
+(1 row)
+
diff --git a/src/test/modules/buffile/expected/test_13.out b/src/test/modules/buffile/expected/test_13.out
new file mode 100644
index 0000000000..7ba50a9c9d
--- /dev/null
+++ b/src/test/modules/buffile/expected/test_13.out
@@ -0,0 +1,86 @@
+-- Use transaction block so that the file does not closed automatically at
+-- command boundary.
+BEGIN;
+SELECT buffile_open_transient('trans1', true, false);
+ buffile_open_transient 
+------------------------
+ 
+(1 row)
+
+SELECT buffile_write('01234567');
+ buffile_write 
+---------------
+             8
+(1 row)
+
+SELECT buffile_close_transient();
+ buffile_close_transient 
+-------------------------
+ 
+(1 row)
+
+-- Open for reading.
+SELECT buffile_open_transient('trans1', false, false);
+ buffile_open_transient 
+------------------------
+ 
+(1 row)
+
+SELECT length(buffile_read(65536));
+ length 
+--------
+      8
+(1 row)
+
+SELECT buffile_close_transient();
+ buffile_close_transient 
+-------------------------
+ 
+(1 row)
+
+-- Open for writing in append mode.
+SELECT buffile_open_transient('trans1', true, true);
+ buffile_open_transient 
+------------------------
+ 
+(1 row)
+
+-- Add BLCKSZ bytes, so that buffer boundary is crossed.
+SELECT buffile_write(repeat('x', 8192));
+ buffile_write 
+---------------
+          8192
+(1 row)
+
+SELECT buffile_close_transient();
+ buffile_close_transient 
+-------------------------
+ 
+(1 row)
+
+-- Open for reading and verify the valid part.
+SELECT buffile_open_transient('trans1', false, false);
+ buffile_open_transient 
+------------------------
+ 
+(1 row)
+
+SELECT length(buffile_read(65536));
+ length 
+--------
+   8200
+(1 row)
+
+SELECT buffile_close_transient();
+ buffile_close_transient 
+-------------------------
+ 
+(1 row)
+
+SELECT buffile_delete_file('trans1');
+ buffile_delete_file 
+---------------------
+ 
+(1 row)
+
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_00.sql b/src/test/modules/buffile/sql/test_00.sql
new file mode 100644
index 0000000000..c907a5e195
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_00.sql
@@ -0,0 +1,18 @@
+CREATE EXTENSION IF NOT EXISTS buffile;
+
+-- This test only verifies that PG is compiled with a component file size of
+-- 32 kB (i.e. 4 buffers of 8 kB) instead of 1 GB. That seems appropriate for
+-- testing. Some other tests may rely on it.
+BEGIN;
+SELECT buffile_create();
+-- Skip the first component file.
+SELECT buffile_seek(0, 32768);
+-- Write the first byte of the second component file. We can't simply
+-- buffile_seek() beyond the position 32768 as it would return EOF.
+SELECT buffile_write('a');
+-- Enforce BufFileFlush(), which actually adds the component file.
+SELECT buffile_read(1);
+-- Check that we're in the 2nd file, i.e. the file size is as expected.
+SELECT buffile_assert_fileno(1);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_01.sql b/src/test/modules/buffile/sql/test_01.sql
new file mode 100644
index 0000000000..8cc87de841
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_01.sql
@@ -0,0 +1,14 @@
+BEGIN;
+SELECT buffile_create();
+SELECT buffile_seek(0, 1);
+SELECT buffile_write('abc');
+SELECT buffile_seek(0, 0);
+-- Check that the trailing zeroes are not fetched.
+SELECT buffile_read(16);
+-- Adjust the number of useful bytes.
+SELECT buffile_write('abc');
+-- ... and check again.
+SELECT buffile_seek(0, 0);
+SELECT buffile_read(16);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_02.sql b/src/test/modules/buffile/sql/test_02.sql
new file mode 100644
index 0000000000..35d34722d7
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_02.sql
@@ -0,0 +1,13 @@
+BEGIN;
+SELECT buffile_create();
+SELECT buffile_seek(0, 8189);
+-- Initialize the last 3 positions of the first buffer and the initial 3
+-- positions of the 2nd buffer.
+SELECT buffile_write('abcdef');
+SELECT buffile_seek(0, 0);
+-- Read the first buffer.
+SELECT length(buffile_read(8192));
+-- Only 3 bytes of the 2nd buffer should be fetched.
+SELECT length(buffile_read(8192));
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_03.sql b/src/test/modules/buffile/sql/test_03.sql
new file mode 100644
index 0000000000..a95391f7c3
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_03.sql
@@ -0,0 +1,7 @@
+BEGIN;
+SELECT buffile_create();
+-- Read from an empty file.
+SELECT buffile_seek(0, 8);
+SELECT buffile_read(16);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_04.sql b/src/test/modules/buffile/sql/test_04.sql
new file mode 100644
index 0000000000..64e8d39f94
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_04.sql
@@ -0,0 +1,25 @@
+BEGIN;
+SELECT buffile_create();
+-- Write something near the end of the first buffer, but leave some trailing
+-- space.
+SELECT buffile_seek(0, 8184);
+SELECT buffile_write('abcd');
+-- Leave the 2nd buffer empty, as well as a few leading bytes. Thus we should
+-- get a hole that spans the whole 2nd buffer as well as a few adjacent bytes
+-- on each side.
+SELECT buffile_seek(0, 2 * 8192 + 4);
+SELECT buffile_write('efgh');
+-- Check the initial part of the hole, which crosses the boundary of the 1st
+-- and the 2nd buffer.
+SELECT buffile_seek(0, 8184);
+SELECT buffile_read(16);
+-- Check the trailing part of the whole, which crosses the boundary of the 2nd
+-- and the 3rd buffer.
+SELECT buffile_seek(0, 2 * 8192 - 8);
+SELECT buffile_read(16);
+-- Check that the hole contains nothing but zeroes.
+SELECT buffile_seek(0, 8192 - 4);
+SELECT btrim(buffile_read(8192 + 8), '\x00');
+
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_05.sql b/src/test/modules/buffile/sql/test_05.sql
new file mode 100644
index 0000000000..5fd642e558
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_05.sql
@@ -0,0 +1,8 @@
+BEGIN;
+SELECT buffile_create();
+-- Seek does not extend the file if it's not followed by write.
+SELECT buffile_seek(0, 1);
+SELECT buffile_seek(0, 0);
+SELECT buffile_read(2);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_06.sql b/src/test/modules/buffile/sql/test_06.sql
new file mode 100644
index 0000000000..bbf506ec0e
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_06.sql
@@ -0,0 +1,11 @@
+-- This test shows that the first component file (segment) stays empty, read
+-- stops prematurely even if it starts on that segment, even though it'd
+-- otherwise receive some data from the following one.
+BEGIN;
+SELECT buffile_create();
+SELECT buffile_seek(0, 32768);
+SELECT buffile_write('a');
+SELECT buffile_seek(0, 32767);
+SELECT buffile_read(2);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_07.sql b/src/test/modules/buffile/sql/test_07.sql
new file mode 100644
index 0000000000..ca78506ecb
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_07.sql
@@ -0,0 +1,9 @@
+BEGIN;
+SELECT buffile_create();
+-- Write data at component file boundary and try to read it.
+SELECT buffile_seek(0, 32768);
+SELECT buffile_write('abcd');
+SELECT buffile_seek(0, 32768);
+SELECT buffile_read(8);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_08.sql b/src/test/modules/buffile/sql/test_08.sql
new file mode 100644
index 0000000000..0e5b1de383
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_08.sql
@@ -0,0 +1,9 @@
+BEGIN;
+SELECT buffile_create();
+-- Write data across component file boundary and try to read it.
+SELECT buffile_seek(0, 32766);
+SELECT buffile_write('abcd');
+SELECT buffile_seek(0, 32766);
+SELECT buffile_read(8);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_09.sql b/src/test/modules/buffile/sql/test_09.sql
new file mode 100644
index 0000000000..cc7060932e
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_09.sql
@@ -0,0 +1,9 @@
+BEGIN;
+SELECT buffile_create();
+-- Write data across buffer boundary and try to read it.
+SELECT buffile_seek(0, 8190);
+SELECT buffile_write('abcd');
+SELECT buffile_seek(0, 8190);
+SELECT buffile_read(8);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_10.sql b/src/test/modules/buffile/sql/test_10.sql
new file mode 100644
index 0000000000..63af760d9f
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_10.sql
@@ -0,0 +1,25 @@
+BEGIN;
+SELECT buffile_create();
+-- Write some data at the end of the buffer.
+SELECT buffile_seek(0, 8188);
+SELECT buffile_write('abcd');
+SELECT buffile_seek(0, 8189);
+-- Enforce flush with the write position not at the end of the buffer. This is
+-- special by not moving curOffset to the next buffer.
+SELECT buffile_read(1);
+
+-- Therefore the next writes should eventually affect the original data. (Here
+-- we also test going directly from read to write and vice versa.)
+SELECT buffile_write('x');
+SELECT buffile_read(1);
+
+-- Start a new buffer, i.e. force flushing of the previous one.
+SELECT buffile_write('z');
+
+-- Check that the 'x' and 'y' letters are in the first buffer, not in the
+-- 2nd. (We read enough data to find any non-zero bytes in the 2nd buffer.)
+SELECT buffile_seek(0, 8188);
+SELECT buffile_read(4 + 8192);
+
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_11.sql b/src/test/modules/buffile/sql/test_11.sql
new file mode 100644
index 0000000000..94300d253f
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_11.sql
@@ -0,0 +1,9 @@
+BEGIN;
+SELECT buffile_create();
+SELECT buffile_write('abcd');
+-- Seek beyond EOF not followed by write.
+SELECT buffile_seek(0, 5);
+-- Nothing should be fetched.
+SELECT buffile_read(8);
+SELECT buffile_close();
+COMMIT;
diff --git a/src/test/modules/buffile/sql/test_12.sql b/src/test/modules/buffile/sql/test_12.sql
new file mode 100644
index 0000000000..a28ae395ef
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_12.sql
@@ -0,0 +1,2 @@
+SELECT buffile_test_shared();
+SELECT buffile_test_shared_append();
diff --git a/src/test/modules/buffile/sql/test_13.sql b/src/test/modules/buffile/sql/test_13.sql
new file mode 100644
index 0000000000..bcabf7bfe6
--- /dev/null
+++ b/src/test/modules/buffile/sql/test_13.sql
@@ -0,0 +1,25 @@
+-- Use transaction block so that the file does not closed automatically at
+-- command boundary.
+BEGIN;
+SELECT buffile_open_transient('trans1', true, false);
+SELECT buffile_write('01234567');
+SELECT buffile_close_transient();
+
+-- Open for reading.
+SELECT buffile_open_transient('trans1', false, false);
+SELECT length(buffile_read(65536));
+SELECT buffile_close_transient();
+
+-- Open for writing in append mode.
+SELECT buffile_open_transient('trans1', true, true);
+-- Add BLCKSZ bytes, so that buffer boundary is crossed.
+SELECT buffile_write(repeat('x', 8192));
+SELECT buffile_close_transient();
+
+-- Open for reading and verify the valid part.
+SELECT buffile_open_transient('trans1', false, false);
+SELECT length(buffile_read(65536));
+SELECT buffile_close_transient();
+
+SELECT buffile_delete_file('trans1');
+COMMIT;
