From 7c075484c444d4b10ea0d5121130544d7e0e5b2c Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 8 Mar 2020 22:52:14 -0500
Subject: [PATCH v32 11/11] Add recursion option in pg_ls_dir_files..

Need catversion bumped ?
---
 doc/src/sgml/func.sgml                       |  6 +-
 src/backend/catalog/system_functions.sql     |  2 +-
 src/backend/utils/adt/genfile.c              | 78 ++++++++++++++++----
 src/bin/pg_rewind/libpq_source.c             | 22 ++----
 src/bin/pg_rewind/t/RewindTest.pm            |  5 +-
 src/include/catalog/pg_proc.dat              | 16 ++--
 src/test/regress/expected/misc_functions.out | 26 ++++++-
 src/test/regress/sql/misc_functions.sql      |  8 +-
 8 files changed, 119 insertions(+), 44 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5c517ed6347..4cb8c0456fe 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -25952,7 +25952,8 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
         </indexterm>
         <function>pg_ls_dir_metadata</function> ( <parameter>dirname</parameter> <type>text</type>
         <optional>, <parameter>missing_ok</parameter> <type>boolean</type>,
-        <parameter>include_dot_dirs</parameter> <type>boolean</type> </optional> )
+        <parameter>include_dot_dirs</parameter> <type>boolean</type>,
+        <parameter>recurse</parameter> <type>boolean</type> </optional> )
         <returnvalue>setof record</returnvalue>
         ( <parameter>filename</parameter> <type>text</type>,
         <parameter>size</parameter> <type>bigint</type>,
@@ -25960,7 +25961,8 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
         <parameter>modification</parameter> <type>timestamp with time zone</type>,
         <parameter>change</parameter> <type>timestamp with time zone</type>,
         <parameter>creation</parameter> <type>timestamp with time zone</type>,
-        <parameter>type</parameter> <type>char</type> )
+        <parameter>type</parameter> <type>char</type>,
+        <parameter>path</parameter> <type>text</type> )
        </para>
        <para>
         For each file in the specified directory, list the file and its
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 5eb6cc8572c..69dbfa38942 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -702,7 +702,7 @@ REVOKE EXECUTE ON FUNCTION pg_stat_file(text,boolean) FROM public;
 REVOKE EXECUTE ON FUNCTION pg_ls_dir(text) FROM public;
 
 REVOKE EXECUTE ON FUNCTION pg_ls_dir(text,boolean,boolean) FROM public;
-REVOKE EXECUTE ON FUNCTION pg_ls_dir_metadata(text,boolean,boolean) FROM public;
+REVOKE EXECUTE ON FUNCTION pg_ls_dir_metadata(text,boolean,boolean,boolean) FROM public;
 
 REVOKE EXECUTE ON FUNCTION pg_log_backend_memory_contexts(integer) FROM PUBLIC;
 
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index 7171357f442..8eb7cb896e4 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -41,6 +41,8 @@ static char get_file_type(mode_t mode, const char *path);
 static void values_from_stat(struct stat *fst, const char *path, Datum *values,
 		bool *nulls);
 static Datum pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags);
+void pg_ls_dir_files_internal(const char *dirname, DIR *dirdesc,
+		Tuplestorestate *tupstore, TupleDesc tupdesc, int flags);
 
 #define	LS_DIR_TYPE					(1<<0) /* Show column: type */
 #define	LS_DIR_METADATA				(1<<1) /* Show columns: mtime, size */
@@ -49,6 +51,7 @@ static Datum pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags
 #define	LS_DIR_SKIP_HIDDEN			(1<<4) /* Do not show anything begining with . */
 #define	LS_DIR_SKIP_DIRS			(1<<5) /* Do not show directories */
 #define	LS_DIR_SKIP_SPECIAL			(1<<6) /* Do not show special file types */
+#define	LS_DIR_RECURSE				(1<<7) /* Recurse into subdirs */
 
 /* Shortcut for common behavior */
 #define LS_DIR_COMMON				(LS_DIR_SKIP_HIDDEN | LS_DIR_METADATA)
@@ -583,7 +586,6 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 	TupleDesc	tupdesc;
 	Tuplestorestate *tupstore;
 	DIR		   *dirdesc;
-	struct dirent *de;
 	MemoryContext oldcontext;
 	TypeFuncClass	tuptype ;
 
@@ -593,9 +595,8 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 	Assert(!(flags&LS_DIR_TYPE) || !(flags&LS_DIR_SKIP_DIRS));
 
 	/* check the optional arguments */
-	if (PG_NARGS() == 3)
-	{
-		if (!PG_ARGISNULL(1))
+	if (PG_NARGS() > 1 &&
+		!PG_ARGISNULL(1))
 		{
 			if (PG_GETARG_BOOL(1))
 				flags |= LS_DIR_MISSING_OK;
@@ -603,14 +604,30 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 				flags &= ~LS_DIR_MISSING_OK;
 		}
 
-		if (!PG_ARGISNULL(2))
+	if (PG_NARGS() > 2 &&
+		!PG_ARGISNULL(2))
 		{
 			if (PG_GETARG_BOOL(2))
 				flags &= ~LS_DIR_SKIP_DOT_DIRS;
 			else
 				flags |= LS_DIR_SKIP_DOT_DIRS;
 		}
-	}
+
+	if (PG_NARGS() > 3 &&
+		!PG_ARGISNULL(3))
+		{
+			if (PG_GETARG_BOOL(3))
+				flags |= LS_DIR_RECURSE;
+			else
+				flags &= ~LS_DIR_RECURSE;
+		}
+
+	if ((flags & LS_DIR_RECURSE) != 0 &&
+			(flags & LS_DIR_SKIP_DOT_DIRS) == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_RECURSION), // ??
+				 errmsg("recursion requires skipping dot dirs")));
+
 
 	/* check to see if caller supports us returning a tuplestore */
 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
@@ -662,10 +679,20 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 		/* Otherwise, we can let ReadDir() throw the error */
 	}
 
-	while ((de = ReadDir(dirdesc, dir)) != NULL)
+	pg_ls_dir_files_internal(dir, dirdesc, tupstore, tupdesc, flags);
+	FreeDir(dirdesc);
+	return (Datum) 0;
+}
+
+void pg_ls_dir_files_internal(const char *dirname, DIR *dirdesc,
+		Tuplestorestate *tupstore, TupleDesc tupdesc, int flags)
+{
+	struct dirent *de;
+
+	while ((de = ReadDir(dirdesc, dirname)) != NULL)
 	{
-		Datum		values[7];
-		bool		nulls[7];
+		Datum		values[8];
+		bool		nulls[8];
 		char		path[MAXPGPATH * 2];
 		struct stat attrib;
 
@@ -681,7 +708,11 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 			continue;
 
 		/* Get the file info */
-		snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
+		if (strcmp(dirname, ".") != 0)
+			snprintf(path, sizeof(path), "%s/%s", dirname, de->d_name);
+		else
+			snprintf(path, sizeof(path), "%s", de->d_name);
+
 		if (lstat(path, &attrib) < 0)
 		{
 			/* Ignore concurrently-deleted files, else complain */
@@ -706,14 +737,33 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 
 		memset(nulls, false, sizeof(nulls));
 		values[0] = CStringGetTextDatum(de->d_name);
-		if (flags & LS_DIR_METADATA)
+		if ((flags & (LS_DIR_RECURSE|LS_DIR_METADATA)) != 0)
+		{
 			values_from_stat(&attrib, path, 1+values, 1+nulls);
 
+			/*
+			 * path is only really useful for recursion, but this function
+			 * can't return different fields when recursing
+			 * XXX: return dirname (which is nice since it's the original,
+			 * unprocessed input to this recursion) or path (which is nice
+			 * since it's a "cooked" value without leading/duplicate slashes)
+			 */
+			values[7] = CStringGetTextDatum(path);
+		}
+
 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
-	}
 
-	FreeDir(dirdesc);
-	return (Datum) 0;
+		/* Recurse? */
+		if ((flags & LS_DIR_RECURSE) != 0 &&
+			S_ISDIR(attrib.st_mode))
+		{
+			DIR *newdir = AllocateDir(path);
+			/* Failure handled by ReadDir */
+			pg_ls_dir_files_internal(path, newdir, tupstore, tupdesc, flags);
+			Assert(newdir != NULL);
+			FreeDir(newdir);
+		}
+	}
 }
 
 /* Function to return the list of files in the log directory */
diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c
index 8e0783fcef3..e6e893eda52 100644
--- a/src/bin/pg_rewind/libpq_source.c
+++ b/src/bin/pg_rewind/libpq_source.c
@@ -237,30 +237,18 @@ libpq_traverse_files(rewind_source *source, process_file_callback_t callback)
 	/*
 	 * Create a recursive directory listing of the whole data directory.
 	 *
-	 * The WITH RECURSIVE part does most of the work. The second part gets the
-	 * targets of the symlinks in pg_tblspc directory.
+	 * Join to pg_tablespace to get the targets of the symlinks in
+	 * pg_tblspc directory.
 	 *
 	 * XXX: There is no backend function to get a symbolic link's target in
 	 * general, so if the admin has put any custom symbolic links in the data
 	 * directory, they won't be copied correctly.
 	 */
 	sql =
-		"WITH RECURSIVE files (path, filename, size, isdir) AS (\n"
-		"  SELECT '' AS path, filename, size, isdir FROM\n"
-		"  (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n"
-		"        pg_stat_file(fn.filename, true) AS this\n"
-		"  UNION ALL\n"
-		"  SELECT parent.path || parent.filename || '/' AS path,\n"
-		"         fn, this.size, this.isdir\n"
-		"  FROM files AS parent,\n"
-		"       pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n"
-		"       pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n"
-		"       WHERE parent.isdir = 't'\n"
-		")\n"
-		"SELECT path || filename, size, isdir,\n"
+		"SELECT path, size, type='d' AS isdir,\n"
 		"       pg_tablespace_location(pg_tablespace.oid) AS link_target\n"
-		"FROM files\n"
-		"LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n"
+		"FROM pg_ls_dir_metadata('.', true, false, true) files\n"
+		"LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc'\n"
 		"                             AND oid::text = files.filename\n";
 	res = PQexec(conn, sql);
 
diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm
index 5546ce456cd..90c38425b5b 100644
--- a/src/bin/pg_rewind/t/RewindTest.pm
+++ b/src/bin/pg_rewind/t/RewindTest.pm
@@ -163,7 +163,10 @@ sub start_primary
 		GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text)
 		  TO rewind_user;
 		GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean)
-		  TO rewind_user;");
+		  TO rewind_user;
+		GRANT EXECUTE ON function pg_catalog.pg_ls_dir_metadata(text, bool, bool, bool)
+		  TO rewind_user;
+		");
 
 	#### Now run the test-specific parts to initialize the primary before setting
 	# up standby
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 998f5164170..02bd2d22598 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11647,16 +11647,22 @@
   prosrc => 'pg_ls_replslotdir' },
 { oid => '8450', descr => 'list directory with metadata',
   proname => 'pg_ls_dir_metadata', procost => '10', prorows => '20', proretset => 't',
-  provolatile => 'v', prorettype => 'record', proargtypes => 'text bool bool',
-  proallargtypes => '{text,bool,bool,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}', proargmodes => '{i,i,i,o,o,o,o,o,o,o}',
-  proargnames => '{dirname,missing_ok,include_dot_dirs,filename,size,access,modification,change,creation,type}',
+  provolatile => 'v', prorettype => 'record', proargtypes => 'text bool bool bool',
+  proallargtypes => '{text,bool,bool,bool,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char,text}', proargmodes => '{i,i,i,i,o,o,o,o,o,o,o,o}',
+  proargnames => '{dirname,missing_ok,include_dot_dirs,recurse,filename,size,access,modification,change,creation,type,path}',
   prosrc => 'pg_ls_dir_metadata' },
 { oid => '8451', descr => 'list directory with metadata',
   proname => 'pg_ls_dir_metadata', procost => '10', prorows => '20', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => 'text',
-  proallargtypes => '{text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}', proargmodes => '{i,o,o,o,o,o,o,o}',
-  proargnames => '{dirname,filename,size,access,modification,change,creation,type}',
+  proallargtypes => '{text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char,text}', proargmodes => '{i,o,o,o,o,o,o,o,o}',
+  proargnames => '{dirname,filename,size,access,modification,change,creation,type,path}',
   prosrc => 'pg_ls_dir_metadata_1arg' },
+{ oid => '8449', descr => 'list all files in a directory recursively',
+  proname => 'pg_ls_dir_recurse_sql', prorows => '10000', proretset => 't',
+  provolatile => 'v', prorettype => 'record', proargtypes => 'text',
+  proallargtypes => '{text,text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}',
+  proargnames => '{dirname,path,filename,size,access,modification,change,creation,type}', proargmodes => '{i,o,o,o,o,o,o,o,o}',
+  prolang => 'sql', prosrc => "with recursive ls as (select dirname as path, * from pg_ls_dir_metadata(dirname, false, false) union all select coalesce(nullif(parent.path,'.')||'/','')||parent.filename, a.filename, a.size, a.access, a.modification, a.change, a.creation, a.type from ls as parent, lateral pg_ls_dir_metadata(parent.path||'/'||parent.filename, false, false) as a where parent.type='d') select * from ls" },
 
 # hash partitioning constraint function
 { oid => '5028', descr => 'hash partition CHECK constraint',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index 1a11661d9fe..fc63206bb6f 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -280,17 +280,37 @@ select filename, type from pg_ls_dir_metadata('.') where filename='.';
  .        | d
 (1 row)
 
-select filename, type from pg_ls_dir_metadata('.', false, false) where filename='.'; -- include_dot_dirs=false
+select filename, type from pg_ls_dir_metadata('.', false, false, false) where filename='.'; -- include_dot_dirs=false
  filename | type 
 ----------+------
 (0 rows)
 
 -- Check that expected columns are present
 select * from pg_ls_dir_metadata('.') limit 0;
- filename | size | access | modification | change | creation | type 
-----------+------+--------+--------------+--------+----------+------
+ filename | size | access | modification | change | creation | type | path 
+----------+------+--------+--------------+--------+----------+------+------
 (0 rows)
 
+-- Exercise recursion
+select path, filename, type from pg_ls_dir_metadata('.', true, false, true) where
+path in ('base', 'base/pgsql_tmp', 'global', 'global/pg_control', 'global/pg_filenode.map', 'PG_VERSION', 'pg_multixact', 'pg_multixact/members', 'pg_multixact/offsets', 'pg_wal', 'pg_wal/archive_status')
+-- (type='d' or path~'^(global/.*|PG_VERSION|postmaster\.opts|postmaster\.pid|pg_logical/replorigin_checkpoint)$') and filename!~'[0-9]'
+order by path collate "C", filename collate "C";
+          path          |    filename     | type 
+------------------------+-----------------+------
+ PG_VERSION             | PG_VERSION      | -
+ base                   | base            | d
+ base/pgsql_tmp         | pgsql_tmp       | d
+ global                 | global          | d
+ global/pg_control      | pg_control      | -
+ global/pg_filenode.map | pg_filenode.map | -
+ pg_multixact           | pg_multixact    | d
+ pg_multixact/members   | members         | d
+ pg_multixact/offsets   | offsets         | d
+ pg_wal                 | pg_wal          | d
+ pg_wal/archive_status  | archive_status  | d
+(11 rows)
+
 --
 -- Test replication slot directory functions
 --
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index a5828414531..54e79bf176d 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -105,11 +105,17 @@ select * from pg_ls_tmpdir() where name='Does not exist';
 
 select filename, type from pg_ls_dir_metadata('.') where filename='.';
 
-select filename, type from pg_ls_dir_metadata('.', false, false) where filename='.'; -- include_dot_dirs=false
+select filename, type from pg_ls_dir_metadata('.', false, false, false) where filename='.'; -- include_dot_dirs=false
 
 -- Check that expected columns are present
 select * from pg_ls_dir_metadata('.') limit 0;
 
+-- Exercise recursion
+select path, filename, type from pg_ls_dir_metadata('.', true, false, true) where
+path in ('base', 'base/pgsql_tmp', 'global', 'global/pg_control', 'global/pg_filenode.map', 'PG_VERSION', 'pg_multixact', 'pg_multixact/members', 'pg_multixact/offsets', 'pg_wal', 'pg_wal/archive_status')
+-- (type='d' or path~'^(global/.*|PG_VERSION|postmaster\.opts|postmaster\.pid|pg_logical/replorigin_checkpoint)$') and filename!~'[0-9]'
+order by path collate "C", filename collate "C";
+
 --
 -- Test replication slot directory functions
 --
-- 
2.17.0

