From eee345a1685a04eeb0fedf3ba9ad0fc6fe6e3d81 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 5 Dec 2021 23:22:04 -0600
Subject: [PATCH 1/2] check_guc: fix absurd number of false positives

Avoid false positives;
Avoid various assumptions;
Avoid list of exceptions;
Simplify shell/awk/sed/grep;

This requires GNU awk for RS as a regex.
---
 src/backend/utils/misc/check_guc | 69 +++++---------------------------
 1 file changed, 10 insertions(+), 59 deletions(-)

diff --git a/src/backend/utils/misc/check_guc b/src/backend/utils/misc/check_guc
index b171ef0e4ff..323ca13191b 100755
--- a/src/backend/utils/misc/check_guc
+++ b/src/backend/utils/misc/check_guc
@@ -1,78 +1,29 @@
-#!/bin/sh
+#! /bin/sh
+set -e
 
-## currently, this script makes a lot of assumptions:
+## this script makes some assumptions about the formatting of files it parses.
 ## in postgresql.conf.sample:
 ##   1) the valid config settings may be preceded by a '#', but NOT '# '
 ##      (we use this to skip comments)
-##   2) the valid config settings will be followed immediately by  ' ='
-##      (at least one space preceding the '=')
-## in guc.c:
-##   3) the options have PGC_ on the same line as the option
-##   4) the options have '{' on the same line as the option
-
-##  Problems
-## 1) Don't know what to do with TRANSACTION ISOLATION LEVEL
-
-## if an option is valid but shows up in only one file (guc.c but not
-## postgresql.conf.sample), it should be listed here so that it
-## can be ignored
-INTENTIONALLY_NOT_INCLUDED="debug_deadlocks in_hot_standby \
-is_superuser lc_collate lc_ctype lc_messages lc_monetary lc_numeric lc_time \
-pre_auth_delay role seed server_encoding server_version server_version_num \
-session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_lwlocks \
-trace_notify trace_userlocks transaction_isolation transaction_read_only \
-zero_damaged_pages"
 
 ### What options are listed in postgresql.conf.sample, but don't appear
 ### in guc.c?
 
-# grab everything that looks like a setting and convert it to lower case
-SETTINGS=`grep ' =' postgresql.conf.sample |
-grep -v '^# ' | # strip comments
-sed -e 's/^#//' |
-awk '{print $1}'`
-
-SETTINGS=`echo "$SETTINGS" | tr 'A-Z' 'a-z'`
+# grab everything that looks like a setting
+SETTINGS=`sed '/^#[[:alnum:]]/!d; s/^#//; s/ =.*//; /^include/d' postgresql.conf.sample`
 
 for i in $SETTINGS ; do
-  hidden=0
   ## it sure would be nice to replace this with an sql "not in" statement
-  ## it doesn't seem to make sense to have things in .sample and not in guc.c
-#  for hidethis in $INTENTIONALLY_NOT_INCLUDED ; do
-#    if [ "$hidethis" = "$i" ] ; then
-#      hidden=1
-#    fi
-#  done
-  if [ "$hidden" -eq 0 ] ; then
-    grep -i '"'$i'"' guc.c > /dev/null
-    if [ $? -ne 0 ] ; then
-      echo "$i seems to be missing from guc.c";
-    fi;
-  fi
+  grep -i "\"$i\"" guc.c >/dev/null ||
+    echo "$i seems to be missing from guc.c";
 done
 
 ### What options are listed in guc.c, but don't appear
 ### in postgresql.conf.sample?
 
 # grab everything that looks like a setting and convert it to lower case
-
-SETTINGS=`grep '{.* PGC_' guc.c | awk '{print $1}' | \
-          sed -e 's/{//g' -e 's/"//g' -e 's/,//'`
-
-SETTINGS=`echo "$SETTINGS" | tr 'A-Z' 'a-z'`
-
+SETTINGS=`gawk -F '[",]' 'BEGIN{RS="\n\t\\\\{\n"} /",[[:space:]]*PGC_.*.*gettext_noop/ && !/NOT_IN_SAMPLE/{print tolower($2)}' guc.c`
 for i in $SETTINGS ; do
-  hidden=0
-  ## it sure would be nice to replace this with an sql "not in" statement
-  for hidethis in $INTENTIONALLY_NOT_INCLUDED ; do
-    if [ "$hidethis" = "$i" ] ; then
-      hidden=1
-    fi
-  done
-  if [ "$hidden" -eq 0 ] ; then
-    grep -i '#'$i' ' postgresql.conf.sample > /dev/null
-    if [ $? -ne 0 ] ; then
-      echo "$i seems to be missing from postgresql.conf.sample";
-    fi
-  fi
+  grep "#$i " postgresql.conf.sample >/dev/null ||
+    echo "$i seems to be missing from postgresql.conf.sample";
 done
-- 
2.17.1

