From 1924e5dbd9a88ce0238814ec4e5a67e6f2eea02d Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <postgres@jeltef.nl>
Date: Wed, 4 Mar 2026 09:24:24 +0100
Subject: [PATCH v4 3/7] pgindent: Allow parallel pgindent runs

Running pgindent on the whole source tree can take a while. This adds
support for pgindent to indent files in parallel. This speeds up a full
pgindent run from ~7 seconds to 1 second on my machine.

Especially with future commits that integrate perltidy into pgindent the
wins are huge, because perltidy is much slower at formatting than
pg_bsd_indent. With those later commits the time it takes to do a full
pgindent run (including perltidy) takes more than a minute on my machine
without the parallelization, but only take ~7 seconds when run in
parallel.
---
 src/tools/pgindent/pgindent | 181 +++++++++++++++++++++++++-----------
 1 file changed, 129 insertions(+), 52 deletions(-)

diff --git a/src/tools/pgindent/pgindent b/src/tools/pgindent/pgindent
index e2e6f19678a..ca56758ffac 100755
--- a/src/tools/pgindent/pgindent
+++ b/src/tools/pgindent/pgindent
@@ -17,6 +17,7 @@ use File::Spec;
 use File::Temp;
 use IO::Handle;
 use Getopt::Long;
+use Fcntl qw(:flock);
 
 # By default Perl's SIGINT/SIGTERM kill the process without running
 # END blocks, so File::Temp never gets to clean up.  Converting the
@@ -42,10 +43,11 @@ my $indent_opts =
 
 my $devnull = File::Spec->devnull;
 
-my ($typedefs_file, $typedef_str, @excludes, $indent, $diff,
-	$check, $help, @commits,);
+my ($typedefs_file, $typedef_str, @excludes, $indent,
+	$diff, $check, $help, @commits, $jobs,);
 
 $help = 0;
+$jobs = 0;
 
 my %options = (
 	"help" => \$help,
@@ -55,9 +57,20 @@ my %options = (
 	"excludes=s" => \@excludes,
 	"indent=s" => \$indent,
 	"diff" => \$diff,
-	"check" => \$check,);
+	"check" => \$check,
+	"jobs|j=i" => \$jobs,);
 GetOptions(%options) || usage("bad command line argument");
 
+if ($jobs == 0)
+{
+	$jobs = get_num_cpus();
+}
+elsif ($jobs < 0)
+{
+	usage("--jobs must be a non-negative number");
+}
+
+
 usage() if $help;
 
 usage("Cannot use --commit with command line file list")
@@ -103,6 +116,19 @@ my %excluded = map { +"$_\n" => 1 } qw(
 # globals
 my @files;
 my $filtered_typedefs_fh;
+my $stdout_lock_fh;
+
+sub get_num_cpus
+{
+	# Try nproc (Linux, some BSDs), then sysctl (macOS, FreeBSD).
+	for my $cmd ('nproc', 'sysctl -n hw.ncpu')
+	{
+		my $n = `$cmd 2>$devnull`;
+		chomp $n;
+		return $n + 0 if ($? == 0 && $n =~ /^\d+$/ && $n > 0);
+	}
+	return 1;
+}
 
 sub check_indent
 {
@@ -383,6 +409,67 @@ sub discover_files
 	return @discovered;
 }
 
+sub process_file
+{
+	my $source_filename = shift;
+
+	# ignore anything that's not a .c or .h file
+	return 0 unless $source_filename =~ /\.[ch]$/;
+
+	# don't try to indent a file that doesn't exist
+	unless (-f $source_filename)
+	{
+		warn "Could not find $source_filename";
+		return 0;
+	}
+	# Automatically ignore .c and .h files that correspond to a .y or .l
+	# file.  indent tends to get badly confused by Bison/flex output,
+	# and there's no value in indenting derived files anyway.
+	my $otherfile = $source_filename;
+	$otherfile =~ s/\.[ch]$/.y/;
+	return 0 if $otherfile ne $source_filename && -f $otherfile;
+	$otherfile =~ s/\.y$/.l/;
+	return 0 if $otherfile ne $source_filename && -f $otherfile;
+
+	my $source = read_source($source_filename);
+	my $orig_source = $source;
+	my $error_message = '';
+
+	$source = pre_indent($source);
+
+	$source = run_indent($source, \$error_message);
+	if ($source eq "")
+	{
+		print STDERR "Failure in $source_filename: " . $error_message . "\n";
+		return 3;
+	}
+
+	$source = post_indent($source);
+
+	if ($source ne $orig_source)
+	{
+		if (!$diff && !$check)
+		{
+			write_source($source, $source_filename);
+		}
+		else
+		{
+			if ($diff)
+			{
+				my $output = diff($source, $source_filename);
+				flock($stdout_lock_fh, LOCK_EX) or die "flock: $!";
+				print $output;
+				STDOUT->flush();
+				flock($stdout_lock_fh, LOCK_UN) or die "flock: $!";
+			}
+
+			return 2 if $check;
+		}
+	}
+
+	return 0;
+}
+
 sub usage
 {
 	my $message = shift;
@@ -398,6 +485,7 @@ Options:
 	--indent=PATH           path to pg_bsd_indent program
 	--diff                  show the changes that would be made
 	--check                 exit with status 2 if any changes would be made
+	--jobs=N, -j N          number of parallel workers (0 = num CPUs, default 0)
 The --excludes and --commit options can be given more than once.
 EOF
 	if ($help)
@@ -439,67 +527,56 @@ warn "No files to process" unless @files;
 # remove excluded files from the file list
 process_exclude();
 
-my %processed;
-my $status = 0;
+# Used by forked children to serialize diff output to STDOUT via flock().
+$stdout_lock_fh = new File::Temp(TEMPLATE => "pglockXXXXX");
 
-foreach my $source_filename (@files)
-{
-	# skip duplicates
-	next if $processed{$source_filename};
-	$processed{$source_filename} = 1;
+# deduplicate file list
+my %seen;
+@files = grep { !$seen{$_}++ } @files;
 
-	# ignore anything that's not a .c or .h file
-	next unless $source_filename =~ /\.[ch]$/;
+my $status = 0;
 
-	# don't try to indent a file that doesn't exist
-	unless (-f $source_filename)
+if ($jobs <= 1)
+{
+	foreach my $source_filename (@files)
 	{
-		warn "Could not find $source_filename";
-		next;
+		my $file_status = process_file($source_filename);
+		$status = $file_status if $file_status > $status;
+		last if $check && $status >= 2 && !$diff;
 	}
-	# Automatically ignore .c and .h files that correspond to a .y or .l
-	# file.  indent tends to get badly confused by Bison/flex output,
-	# and there's no value in indenting derived files anyway.
-	my $otherfile = $source_filename;
-	$otherfile =~ s/\.[ch]$/.y/;
-	next if $otherfile ne $source_filename && -f $otherfile;
-	$otherfile =~ s/\.y$/.l/;
-	next if $otherfile ne $source_filename && -f $otherfile;
-
-	my $source = read_source($source_filename);
-	my $orig_source = $source;
-	my $error_message = '';
-
-	$source = pre_indent($source);
+}
+else
+{
+	my %children;    # pid => 1
 
-	$source = run_indent($source, \$error_message);
-	if ($source eq "")
+	my $file_idx = 0;
+	while ($file_idx < scalar(@files) || %children)
 	{
-		print STDERR "Failure in $source_filename: " . $error_message . "\n";
-		$status = 3;
-		next;
-	}
+		# Fork new children up to $jobs limit
+		while ($file_idx < scalar(@files) && scalar(keys %children) < $jobs)
+		{
+			my $source_filename = $files[ $file_idx++ ];
 
-	$source = post_indent($source);
+			my $pid = fork();
+			die "fork failed: $!\n" unless defined $pid;
 
-	if ($source ne $orig_source)
-	{
-		if (!$diff && !$check)
-		{
-			write_source($source, $source_filename);
-		}
-		else
-		{
-			if ($diff)
+			if ($pid == 0)
 			{
-				print diff($source, $source_filename);
+				# child
+				my $child_status = process_file($source_filename);
+				exit $child_status;
 			}
 
-			if ($check)
-			{
-				$status ||= 2;
-				last unless $diff;
-			}
+			$children{$pid} = 1;
+		}
+
+		# Wait for at least one child to finish
+		my $pid = waitpid(-1, 0);
+		if ($pid > 0 && exists $children{$pid})
+		{
+			delete $children{$pid};
+			my $child_status = $? >> 8;
+			$status = $child_status if $child_status > $status;
 		}
 	}
 }
-- 
2.53.0

