From b21850b49a9dd9f4a60eb9c243ba64be61e7e9c0 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 3 Oct 2022 18:26:24 -0700
Subject: [PATCH v18 01/22] meson: docs: Add xml{lint,proc} wrapper to collect
 dependencies

meson/ninja do not support specifying dependencies via globs (as those make it
significantly more expensive to check the current build state). Instead
targets should emit dependency information when running that then can be
cheaply re-checked during future builds.

To handle xmllint and xsltproc invocations in the docs, add and use a wrapper
that uses --load-trace to collect dependency information.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/c5736f70-bb6d-8d25-e35c-e3d886e4e905@enterprisedb.com
---
 doc/src/sgml/meson.build          | 41 ++++++++++++++++-------
 doc/src/sgml/xmltools_dep_wrapper | 54 +++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 11 deletions(-)
 create mode 100644 doc/src/sgml/xmltools_dep_wrapper

diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build
index ba2a261e7a4..65fd6131344 100644
--- a/doc/src/sgml/meson.build
+++ b/doc/src/sgml/meson.build
@@ -2,7 +2,7 @@ docs = []
 alldocs = []
 doc_generated = []
 
-xmllint = find_program(get_option('XMLLINT'), native: true, required: false)
+xmllint_bin = find_program(get_option('XMLLINT'), native: true, required: false)
 
 
 version_sgml = configure_file(
@@ -60,14 +60,23 @@ doc_generated += custom_target('keywords-table.sgml',
 )
 
 # For everything else we need at least xmllint
-if not xmllint.found()
+if not xmllint_bin.found()
   subdir_done()
 endif
 
 pandoc = find_program('pandoc', native: true, required: false)
-xsltproc = find_program(get_option('XSLTPROC'), native: true, required: false)
+xsltproc_bin = find_program(get_option('XSLTPROC'), native: true, required: false)
 fop = find_program('fop', native: true, required: false)
 
+xmltools_wrapper = [
+  python, files('xmltools_dep_wrapper'),
+  '--targetname', '@OUTPUT@', '--depfile', '@DEPFILE@'
+]
+
+xmllint = xmltools_wrapper + [
+  '--tool', xmllint_bin, '--',
+]
+
 # Run validation only once, common to all subsequent targets.  While
 # we're at it, also resolve all entities (that is, copy all included
 # files into one big file).  This helps tools that don't understand
@@ -75,6 +84,7 @@ fop = find_program('fop', native: true, required: false)
 postgres_full_xml = custom_target('postgres-full.xml',
   input: 'postgres.sgml',
   output: 'postgres-full.xml',
+  depfile: 'postgres-full.xml.d',
   command: [xmllint, '--noent', '--valid', '--path', '@OUTDIR@', '-o', '@OUTPUT@', '@INPUT@'],
   depends: doc_generated,
   build_by_default: false,
@@ -86,18 +96,20 @@ alldocs += postgres_full_xml
 #
 # Full documentation as html, text
 #
-if xsltproc.found()
+if xsltproc_bin.found()
   xsltproc_flags = [
     '--stringparam', 'pg.version', pg_version,
     '--param', 'website.stylesheet', '1'
   ]
 
+  xsltproc = xmltools_wrapper + [
+    '--tool', xsltproc_bin, '--',
+  ]
 
-  # FIXME: Should use a wrapper around xsltproc --load-trace to compute a
-  # depfile
   html = custom_target('html',
     input: ['stylesheet.xsl', postgres_full_xml],
     output: 'html',
+    depfile: 'html.d',
     depends: doc_generated,
     command: [xsltproc, '-o', '@OUTDIR@/', xsltproc_flags, '@INPUT@'],
     build_by_default: false,
@@ -110,6 +122,7 @@ if xsltproc.found()
   html_help = custom_target('html_help',
     input: ['stylesheet-hh.xsl', postgres_full_xml],
     output: 'htmlhelp',
+    depfile: 'htmlhelp.d',
     depends: doc_generated,
     command: [xsltproc, '--path', '@OUTDIR@', '-o', '@OUTDIR@/', xsltproc_flags, '@INPUT@'],
     build_by_default: false,
@@ -121,6 +134,7 @@ if xsltproc.found()
   postgres_html = custom_target('postgres.html',
     input: ['stylesheet-html-nochunk.xsl', postgres_full_xml],
     output: 'postgres.html',
+    depfile: 'postgres.html.d',
     depends: doc_generated,
     command: [xsltproc, '--path', '@OUTDIR@', '-o', '@OUTPUT@', xsltproc_flags, '@INPUT@'],
     build_by_default: false,
@@ -144,10 +158,11 @@ endif
 #
 # INSTALL in html, text
 #
-if xsltproc.found()
+if xsltproc_bin.found()
   install_xml = custom_target('INSTALL.xml',
     input: ['standalone-profile.xsl', 'standalone-install.xml'],
     output: 'INSTALL.xml',
+    depfile: 'INSTALL.xml.d',
     depends: doc_generated + [postgres_full_xml],
     command: [xsltproc, '--path', '@OUTDIR@', '-o', '@OUTPUT@', xsltproc_flags, '--xinclude', '@INPUT@'],
     build_by_default: false,
@@ -155,6 +170,7 @@ if xsltproc.found()
   install_html = custom_target('INSTALL.html',
     input: ['stylesheet-text.xsl', install_xml],
     output: 'INSTALL.html',
+    depfile: 'INSTALL.html.d',
     command: [xsltproc, '--path', '@OUTDIR@', '-o', '@OUTPUT@', xsltproc_flags, '@INPUT@'],
     build_by_default: false,
   )
@@ -177,11 +193,12 @@ endif
 #
 # Man pages
 #
-if xsltproc.found()
+if xsltproc_bin.found()
   # FIXME: implement / consider sqlmansectnum logic
   man = custom_target('man',
     input: ['stylesheet-man.xsl', postgres_full_xml],
     output: ['man1', 'man3', 'man7'],
+    depfile: 'man.d',
     depends: doc_generated,
     command: [xsltproc, '--path', '@OUTDIR@', '-o', '@OUTDIR@/', xsltproc_flags, '@INPUT@'],
     build_by_default: false,
@@ -195,17 +212,19 @@ endif
 #
 # Full documentation as PDF
 #
-if fop.found() and xsltproc.found()
+if fop.found() and xsltproc_bin.found()
   xsltproc_fo_flags = xsltproc_flags + ['--stringparam', 'img.src.path', meson.current_source_dir() + '/']
 
   foreach format, detail: {'A4': 'A4', 'US': 'USletter'}
     postgres_x_fo_f = 'postgres-@0@.fo'.format(format)
+    postgres_x_fo_dep = 'postgres-@0@.fo.d'.format(format)
     postgres_x_pdf_f = 'postgres-@0@.pdf'.format(format)
 
     postgres_x_fo = custom_target(postgres_x_fo_f,
       input: ['stylesheet-fo.xsl', postgres_full_xml],
-      output: [postgres_x_fo_f],
+      output: postgres_x_fo_f,
       depends: doc_generated,
+      depfile: postgres_x_fo_dep,
       command: [xsltproc, '--path', '@OUTDIR@/', xsltproc_fo_flags,
                 '--stringparam', 'paper.type', detail,
                 '-o', '@OUTPUT@', '@INPUT@'],
@@ -230,7 +249,7 @@ endif
 # This was previously implemented using dbtoepub - but that doesn't seem to
 # support running in build != source directory (i.e. VPATH builds already
 # weren't supported).
-if pandoc.found() and xsltproc.found()
+if pandoc.found() and xsltproc_bin.found()
   postgres_epub = custom_target('postgres.epub',
     input: postgres_full_xml,
     output: 'postgres.epub',
diff --git a/doc/src/sgml/xmltools_dep_wrapper b/doc/src/sgml/xmltools_dep_wrapper
new file mode 100644
index 00000000000..dd96f784268
--- /dev/null
+++ b/doc/src/sgml/xmltools_dep_wrapper
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+
+# A small wrapper around xmllint and xsltproc that collects dependency
+# information (in gcc's format) using --load-trace.
+
+import argparse
+import re
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser(
+    description='generate dependency file for docs')
+
+parser.add_argument('--targetname', type=str, required=False, nargs='+')
+parser.add_argument('--depfile', type=str, required=False)
+parser.add_argument('--tool', type=str, required=True)
+parser.add_argument('flags', nargs='*')
+
+args = parser.parse_args()
+
+if args.depfile:
+    command = [args.tool, '--load-trace'] + args.flags
+
+    # list of targets that depend on the loaded files we see via --load-trace
+    line_start = ' '.join(args.targetname) + ': '
+
+    # --load-trace flag displays all the documents loaded during the processing
+    # to stderr
+    res = subprocess.run(command, stderr=subprocess.PIPE,
+                         universal_newlines=True)
+
+    line_re = re.compile('^Loaded URL="([^"]+)"')
+    with open(args.depfile, 'w') as f:
+        for line in res.stderr.splitlines():
+            m = re.match(line_re, line)
+
+            # continue to show errors
+            if m is None:
+                print(line, file=sys.stderr)
+                continue
+            # Absolute paths are printed as file://, relative paths as-is. We
+            # don't care about http://, as a) those will be printed even if
+            # resolved locally b) we couldn't have a dependency anyway.
+            fname = m.group(1)
+            if fname.startswith('http://'):
+                continue
+            if fname.startswith('file://'):
+                fname = fname.split('file://')[1]
+            f.write(line_start + fname + '\n')
+else:
+    command = [args.tool] + args.flags
+    res = subprocess.run(command)
+
+exit(res.returncode)
-- 
2.37.3.542.gdd3f6c4cae

