Author: Noah Misch Commit: Noah Misch Fix data loss in wal_level=minimal crash recovery of CREATE TABLESPACE. If the system crashed between CREATE TABLESPACE and the next checkpoint, the result could be some files in the tablespace unexpectedly containing no rows. Affected files would be those for which the system did not write WAL; see the wal_skip_threshold documentation. Before v13, a different set of conditions governed the writing of WAL; see v12's . Users may want to audit non-default tablespaces for unexpected short files. This bug could have truncated an index without affecting the associated table, and reindexing the index would fix that particular problem. This fixes the bug by making create_tablespace_directories() more like TablespaceCreateDbspace(). Back-patch to 9.6 (all supported versions). Reviewed by Robert Haas. Discussion: https://postgr.es/m/CA+TgmoaLO9ncuwvr2nN-J4VEP5XyAcy=zKiHxQzBbFRxxGxm0w@mail.gmail.com diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index a54239a..932e7ae 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -614,40 +614,36 @@ create_tablespace_directories(const char *location, const Oid tablespaceoid) location))); } - if (InRecovery) - { - /* - * Our theory for replaying a CREATE is to forcibly drop the target - * subdirectory if present, and then recreate it. This may be more - * work than needed, but it is simple to implement. - */ - if (stat(location_with_version_dir, &st) == 0 && S_ISDIR(st.st_mode)) - { - if (!rmtree(location_with_version_dir, true)) - /* If this failed, MakePGDirectory() below is going to error. */ - ereport(WARNING, - (errmsg("some useless files may be left behind in old database directory \"%s\"", - location_with_version_dir))); - } - } - /* * The creation of the version directory prevents more than one tablespace - * in a single location. + * in a single location. This imitates TablespaceCreateDbspace(), but it + * ignores concurrency and missing parent directories. The chmod() would + * have failed in the absence of a parent. pg_tablespace_spcname_index + * prevents concurrency. */ - if (MakePGDirectory(location_with_version_dir) < 0) + if (stat(location_with_version_dir, &st) < 0) { - if (errno == EEXIST) + if (errno != ENOENT) ereport(ERROR, - (errcode(ERRCODE_OBJECT_IN_USE), - errmsg("directory \"%s\" already in use as a tablespace", + (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", location_with_version_dir))); - else + else if (MakePGDirectory(location_with_version_dir) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", location_with_version_dir))); } + else if (!S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" exists but is not a directory", + location_with_version_dir))); + else if (!InRecovery) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("directory \"%s\" already in use as a tablespace", + location_with_version_dir))); /* * In recovery, remove old symlink, in case it points to the wrong place. diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl index 4aa1bf8..47cbc95 100644 --- a/src/test/recovery/t/018_wal_optimize.pl +++ b/src/test/recovery/t/018_wal_optimize.pl @@ -14,7 +14,7 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 34; +use Test::More tests => 38; sub check_orphan_relfilenodes { @@ -59,8 +59,31 @@ wal_skip_threshold = 0 my $tablespace_dir = $node->basedir . '/tablespace_other'; mkdir($tablespace_dir); $tablespace_dir = TestLib::perl2host($tablespace_dir); - $node->safe_psql('postgres', - "CREATE TABLESPACE other LOCATION '$tablespace_dir';"); + my $result; + + # Test redo of CREATE TABLESPACE. + $node->safe_psql( + 'postgres', " + CREATE TABLE moved (id int); + INSERT INTO moved VALUES (1); + CREATE TABLESPACE other LOCATION '$tablespace_dir'; + BEGIN; + ALTER TABLE moved SET TABLESPACE other; + CREATE TABLE originated (id int); + INSERT INTO originated VALUES (1); + CREATE UNIQUE INDEX ON originated(id) TABLESPACE other; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM moved;"); + is($result, qq(1), "wal_level = $wal_level, CREATE+SET TABLESPACE"); + $result = $node->safe_psql( + 'postgres', " + INSERT INTO originated VALUES (1) ON CONFLICT (id) + DO UPDATE set id = originated.id + 1 + RETURNING id;"); + is($result, qq(2), + "wal_level = $wal_level, CREATE TABLESPACE, CREATE INDEX"); # Test direct truncation optimization. No tuples. $node->safe_psql( @@ -71,7 +94,7 @@ wal_skip_threshold = 0 COMMIT;"); $node->stop('immediate'); $node->start; - my $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;"); + $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;"); is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table"); # Test truncation with inserted tuples within the same transaction.