From 1f83b8a1419e9ccee69897f6e9e53cf7b193412b Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 8 Mar 2020 22:52:14 -0500
Subject: [PATCH v27 11/11] Add recursion option in pg_ls_dir_files..

Need catversion bumped ?
---
 doc/src/sgml/func.sgml                       |  6 +-
 src/backend/catalog/system_views.sql         |  2 +-
 src/backend/utils/adt/genfile.c              | 78 ++++++++++++++++----
 src/bin/pg_rewind/libpq_source.c             | 22 ++----
 src/bin/pg_rewind/t/RewindTest.pm            |  5 +-
 src/include/catalog/pg_proc.dat              | 17 +++--
 src/test/regress/expected/misc_functions.out | 26 ++++++-
 src/test/regress/sql/misc_functions.sql      |  8 +-
 8 files changed, 120 insertions(+), 44 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 0a7083efac..34d939f77b 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -25800,7 +25800,8 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
         </indexterm>
         <function>pg_ls_dir_metadata</function> ( <parameter>dirname</parameter> <type>text</type>
         <optional>, <parameter>missing_ok</parameter> <type>boolean</type>,
-        <parameter>include_dot_dirs</parameter> <type>boolean</type> </optional> )
+        <parameter>include_dot_dirs</parameter> <type>boolean</type>,
+        <parameter>recurse</parameter> <type>boolean</type> </optional> )
         <returnvalue>setof record</returnvalue>
         ( <parameter>filename</parameter> <type>text</type>,
         <parameter>size</parameter> <type>bigint</type>,
@@ -25808,7 +25809,8 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
         <parameter>modification</parameter> <type>timestamp with time zone</type>,
         <parameter>change</parameter> <type>timestamp with time zone</type>,
         <parameter>creation</parameter> <type>timestamp with time zone</type>,
-        <parameter>type</parameter> <type>char</type> )
+        <parameter>type</parameter> <type>char</type>,
+        <parameter>path</parameter> <type>text</type> )
        </para>
        <para>
         For each file in the specified directory, list the file and its
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index fcd050b752..d915a3b342 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1607,7 +1607,7 @@ REVOKE EXECUTE ON FUNCTION pg_stat_file(text,boolean) FROM public;
 
 REVOKE EXECUTE ON FUNCTION pg_ls_dir(text) FROM public;
 REVOKE EXECUTE ON FUNCTION pg_ls_dir(text,boolean,boolean) FROM public;
-REVOKE EXECUTE ON FUNCTION pg_ls_dir_metadata(text,boolean,boolean) FROM public;
+REVOKE EXECUTE ON FUNCTION pg_ls_dir_metadata(text,boolean,boolean,boolean) FROM public;
 
 --
 -- We also set up some things as accessible to standard roles.
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index ff31e38484..ecb5b9fda9 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -40,6 +40,8 @@ static char get_file_type(mode_t mode, const char *path);
 static void values_from_stat(struct stat *fst, const char *path, Datum *values,
 		bool *nulls);
 static Datum pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags);
+void pg_ls_dir_files_internal(const char *dirname, DIR *dirdesc,
+		Tuplestorestate *tupstore, TupleDesc tupdesc, int flags);
 
 #define	LS_DIR_TYPE					(1<<0) /* Show column: type */
 #define	LS_DIR_METADATA				(1<<1) /* Show columns: mtime, size */
@@ -48,6 +50,7 @@ static Datum pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags
 #define	LS_DIR_SKIP_HIDDEN			(1<<4) /* Do not show anything begining with . */
 #define	LS_DIR_SKIP_DIRS			(1<<5) /* Do not show directories */
 #define	LS_DIR_SKIP_SPECIAL			(1<<6) /* Do not show special file types */
+#define	LS_DIR_RECURSE				(1<<7) /* Recurse into subdirs */
 
 /* Shortcut for common behavior */
 #define LS_DIR_COMMON				(LS_DIR_SKIP_HIDDEN | LS_DIR_METADATA)
@@ -583,7 +586,6 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 	TupleDesc	tupdesc;
 	Tuplestorestate *tupstore;
 	DIR		   *dirdesc;
-	struct dirent *de;
 	MemoryContext oldcontext;
 	TypeFuncClass	tuptype ;
 
@@ -593,9 +595,8 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 	Assert(!(flags&LS_DIR_TYPE) || !(flags&LS_DIR_SKIP_DIRS));
 
 	/* check the optional arguments */
-	if (PG_NARGS() == 3)
-	{
-		if (!PG_ARGISNULL(1))
+	if (PG_NARGS() > 1 &&
+		!PG_ARGISNULL(1))
 		{
 			if (PG_GETARG_BOOL(1))
 				flags |= LS_DIR_MISSING_OK;
@@ -603,14 +604,30 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 				flags &= ~LS_DIR_MISSING_OK;
 		}
 
-		if (!PG_ARGISNULL(2))
+	if (PG_NARGS() > 2 &&
+		!PG_ARGISNULL(2))
 		{
 			if (PG_GETARG_BOOL(2))
 				flags &= ~LS_DIR_SKIP_DOT_DIRS;
 			else
 				flags |= LS_DIR_SKIP_DOT_DIRS;
 		}
-	}
+
+	if (PG_NARGS() > 3 &&
+		!PG_ARGISNULL(3))
+		{
+			if (PG_GETARG_BOOL(3))
+				flags |= LS_DIR_RECURSE;
+			else
+				flags &= ~LS_DIR_RECURSE;
+		}
+
+	if ((flags & LS_DIR_RECURSE) != 0 &&
+			(flags & LS_DIR_SKIP_DOT_DIRS) == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_RECURSION), // ??
+				 errmsg("recursion requires skipping dot dirs")));
+
 
 	/* check to see if caller supports us returning a tuplestore */
 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
@@ -662,10 +679,20 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 		/* Otherwise, we can let ReadDir() throw the error */
 	}
 
-	while ((de = ReadDir(dirdesc, dir)) != NULL)
+	pg_ls_dir_files_internal(dir, dirdesc, tupstore, tupdesc, flags);
+	FreeDir(dirdesc);
+	return (Datum) 0;
+}
+
+void pg_ls_dir_files_internal(const char *dirname, DIR *dirdesc,
+		Tuplestorestate *tupstore, TupleDesc tupdesc, int flags)
+{
+	struct dirent *de;
+
+	while ((de = ReadDir(dirdesc, dirname)) != NULL)
 	{
-		Datum		values[7];
-		bool		nulls[7];
+		Datum		values[8];
+		bool		nulls[8];
 		char		path[MAXPGPATH * 2];
 		struct stat attrib;
 
@@ -681,7 +708,11 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 			continue;
 
 		/* Get the file info */
-		snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
+		if (strcmp(dirname, ".") != 0)
+			snprintf(path, sizeof(path), "%s/%s", dirname, de->d_name);
+		else
+			snprintf(path, sizeof(path), "%s", de->d_name);
+
 		if (lstat(path, &attrib) < 0)
 		{
 			/* Ignore concurrently-deleted files, else complain */
@@ -706,14 +737,33 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags)
 
 		memset(nulls, false, sizeof(nulls));
 		values[0] = CStringGetTextDatum(de->d_name);
-		if (flags & LS_DIR_METADATA)
+		if ((flags & (LS_DIR_RECURSE|LS_DIR_METADATA)) != 0)
+		{
 			values_from_stat(&attrib, path, 1+values, 1+nulls);
 
+			/*
+			 * path is only really useful for recursion, but this function
+			 * can't return different fields when recursing
+			 * XXX: return dirname (which is nice since it's the original,
+			 * unprocessed input to this recursion) or path (which is nice
+			 * since it's a "cooked" value without leading/duplicate slashes)
+			 */
+			values[7] = CStringGetTextDatum(path);
+		}
+
 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
-	}
 
-	FreeDir(dirdesc);
-	return (Datum) 0;
+		/* Recurse? */
+		if ((flags & LS_DIR_RECURSE) != 0 &&
+			S_ISDIR(attrib.st_mode))
+		{
+			DIR *newdir = AllocateDir(path);
+			/* Failure handled by ReadDir */
+			pg_ls_dir_files_internal(path, newdir, tupstore, tupdesc, flags);
+			Assert(newdir != NULL);
+			FreeDir(newdir);
+		}
+	}
 }
 
 /* Function to return the list of files in the log directory */
diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c
index ac794cf4eb..814d3de3fc 100644
--- a/src/bin/pg_rewind/libpq_source.c
+++ b/src/bin/pg_rewind/libpq_source.c
@@ -237,30 +237,18 @@ libpq_traverse_files(rewind_source *source, process_file_callback_t callback)
 	/*
 	 * Create a recursive directory listing of the whole data directory.
 	 *
-	 * The WITH RECURSIVE part does most of the work. The second part gets the
-	 * targets of the symlinks in pg_tblspc directory.
+	 * Join to pg_tablespace to get the targets of the symlinks in
+	 * pg_tblspc directory.
 	 *
 	 * XXX: There is no backend function to get a symbolic link's target in
 	 * general, so if the admin has put any custom symbolic links in the data
 	 * directory, they won't be copied correctly.
 	 */
 	sql =
-		"WITH RECURSIVE files (path, filename, size, isdir) AS (\n"
-		"  SELECT '' AS path, filename, size, isdir FROM\n"
-		"  (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n"
-		"        pg_stat_file(fn.filename, true) AS this\n"
-		"  UNION ALL\n"
-		"  SELECT parent.path || parent.filename || '/' AS path,\n"
-		"         fn, this.size, this.isdir\n"
-		"  FROM files AS parent,\n"
-		"       pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n"
-		"       pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n"
-		"       WHERE parent.isdir = 't'\n"
-		")\n"
-		"SELECT path || filename, size, isdir,\n"
+		"SELECT path, size, type='d' AS isdir,\n"
 		"       pg_tablespace_location(pg_tablespace.oid) AS link_target\n"
-		"FROM files\n"
-		"LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n"
+		"FROM pg_ls_dir_metadata('.', true, false, true) files\n"
+		"LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc'\n"
 		"                             AND oid::text = files.filename\n";
 	res = PQexec(conn, sql);
 
diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm
index 41ed7d4b3b..d911b7de52 100644
--- a/src/bin/pg_rewind/t/RewindTest.pm
+++ b/src/bin/pg_rewind/t/RewindTest.pm
@@ -160,7 +160,10 @@ sub start_primary
 		GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text)
 		  TO rewind_user;
 		GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean)
-		  TO rewind_user;");
+		  TO rewind_user;
+		GRANT EXECUTE ON function pg_catalog.pg_ls_dir_metadata(text, bool, bool, bool)
+		  TO rewind_user;
+		");
 
 	#### Now run the test-specific parts to initialize the primary before setting
 	# up standby
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 89ce701654..d6ca02e2d0 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11519,17 +11519,24 @@
   prosrc => 'pg_ls_tmpdir_1arg' },
 { oid => '8450', descr => 'list directory with metadata',
   proname => 'pg_ls_dir_metadata', procost => '10', prorows => '20', proretset => 't',
-  provolatile => 'v', prorettype => 'record', proargtypes => 'text bool bool',
-  proallargtypes => '{text,bool,bool,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}', proargmodes => '{i,i,i,o,o,o,o,o,o,o}',
-  proargnames => '{dirname,missing_ok,include_dot_dirs,filename,size,access,modification,change,creation,type}',
+  provolatile => 'v', prorettype => 'record', proargtypes => 'text bool bool bool',
+  proallargtypes => '{text,bool,bool,bool,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char,text}', proargmodes => '{i,i,i,i,o,o,o,o,o,o,o,o}',
+  proargnames => '{dirname,missing_ok,include_dot_dirs,recurse,filename,size,access,modification,change,creation,type,path}',
   prosrc => 'pg_ls_dir_metadata' },
 { oid => '8451', descr => 'list directory with metadata',
   proname => 'pg_ls_dir_metadata', procost => '10', prorows => '20', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => 'text',
-  proallargtypes => '{text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}', proargmodes => '{i,o,o,o,o,o,o,o}',
-  proargnames => '{dirname,filename,size,access,modification,change,creation,type}',
+  proallargtypes => '{text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char,text}', proargmodes => '{i,o,o,o,o,o,o,o,o}',
+  proargnames => '{dirname,filename,size,access,modification,change,creation,type,path}',
   prosrc => 'pg_ls_dir_metadata_1arg' },
 
+{ oid => '9981', descr => 'list all files in a directory recursively',
+  proname => 'pg_ls_dir_recurse_sql', prorows => '10000', proretset => 't',
+  provolatile => 'v', prorettype => 'record', proargtypes => 'text',
+  proallargtypes => '{text,text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}',
+  proargnames => '{dirname,path,filename,size,access,modification,change,creation,type}', proargmodes => '{i,o,o,o,o,o,o,o,o}',
+  prolang => 'sql', prosrc => "with recursive ls as (select dirname as path, * from pg_ls_dir_metadata(dirname, false, false) union all select coalesce(nullif(parent.path,'.')||'/','')||parent.filename, a.filename, a.size, a.access, a.modification, a.change, a.creation, a.type from ls as parent, lateral pg_ls_dir_metadata(parent.path||'/'||parent.filename, false, false) as a where parent.type='d') select * from ls" },
+
 # hash partitioning constraint function
 { oid => '5028', descr => 'hash partition CHECK constraint',
   proname => 'satisfies_hash_partition', provariadic => 'any',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index 349a549744..3cb27fc997 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -251,17 +251,37 @@ select filename, type from pg_ls_dir_metadata('.') where filename='.';
  .        | d
 (1 row)
 
-select filename, type from pg_ls_dir_metadata('.', false, false) where filename='.'; -- include_dot_dirs=false
+select filename, type from pg_ls_dir_metadata('.', false, false, false) where filename='.'; -- include_dot_dirs=false
  filename | type 
 ----------+------
 (0 rows)
 
 -- Check that expected columns are present
 select * from pg_ls_dir_metadata('.') limit 0;
- filename | size | access | modification | change | creation | type 
-----------+------+--------+--------------+--------+----------+------
+ filename | size | access | modification | change | creation | type | path 
+----------+------+--------+--------------+--------+----------+------+------
 (0 rows)
 
+-- Exercise recursion
+select path, filename, type from pg_ls_dir_metadata('.', true, false, true) where
+path in ('base', 'base/pgsql_tmp', 'global', 'global/pg_control', 'global/pg_filenode.map', 'PG_VERSION', 'pg_multixact', 'pg_multixact/members', 'pg_multixact/offsets', 'pg_wal', 'pg_wal/archive_status')
+-- (type='d' or path~'^(global/.*|PG_VERSION|postmaster\.opts|postmaster\.pid|pg_logical/replorigin_checkpoint)$') and filename!~'[0-9]'
+order by path collate "C", filename collate "C";
+          path          |    filename     | type 
+------------------------+-----------------+------
+ PG_VERSION             | PG_VERSION      | -
+ base                   | base            | d
+ base/pgsql_tmp         | pgsql_tmp       | d
+ global                 | global          | d
+ global/pg_control      | pg_control      | -
+ global/pg_filenode.map | pg_filenode.map | -
+ pg_multixact           | pg_multixact    | d
+ pg_multixact/members   | members         | d
+ pg_multixact/offsets   | offsets         | d
+ pg_wal                 | pg_wal          | d
+ pg_wal/archive_status  | archive_status  | d
+(11 rows)
+
 --
 -- Test adding a support function to a subject function
 --
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index 55bed7b9e0..d8c9194719 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -83,11 +83,17 @@ select * from pg_ls_tmpdir() where name='Does not exist';
 
 select filename, type from pg_ls_dir_metadata('.') where filename='.';
 
-select filename, type from pg_ls_dir_metadata('.', false, false) where filename='.'; -- include_dot_dirs=false
+select filename, type from pg_ls_dir_metadata('.', false, false, false) where filename='.'; -- include_dot_dirs=false
 
 -- Check that expected columns are present
 select * from pg_ls_dir_metadata('.') limit 0;
 
+-- Exercise recursion
+select path, filename, type from pg_ls_dir_metadata('.', true, false, true) where
+path in ('base', 'base/pgsql_tmp', 'global', 'global/pg_control', 'global/pg_filenode.map', 'PG_VERSION', 'pg_multixact', 'pg_multixact/members', 'pg_multixact/offsets', 'pg_wal', 'pg_wal/archive_status')
+-- (type='d' or path~'^(global/.*|PG_VERSION|postmaster\.opts|postmaster\.pid|pg_logical/replorigin_checkpoint)$') and filename!~'[0-9]'
+order by path collate "C", filename collate "C";
+
 --
 -- Test adding a support function to a subject function
 --
-- 
2.17.0

