From 27ab61775945d837e37ed6a0ce0c301697d183a1 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz Date: Mon, 8 Sep 2025 17:16:05 +0300 Subject: [PATCH v1] Add sgml_syntax_check test to the Meson build The 'sgml' check from the Makefile has been converted into a Perl script (sgml_syntax_check) and integrated into meson.build. Unlike Autoconf, Meson does not provide a way to mark tests as non-default, so this script runs on every 'meson test'. While this differs from the previous behavior, it is considered acceptable. --- doc/src/sgml/Makefile | 16 +--- doc/src/sgml/meson.build | 23 ++++++ doc/src/sgml/t/sgml_syntax_check.pl | 118 ++++++++++++++++++++++++++++ .cirrus.tasks.yml | 3 + 4 files changed, 146 insertions(+), 14 deletions(-) create mode 100755 doc/src/sgml/t/sgml_syntax_check.pl diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index 11aac913812..3256340a5b2 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -200,8 +200,8 @@ MAKEINFO = makeinfo ## # Quick syntax check without style processing -check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp - $(XMLLINT) $(XMLINCLUDE) --noout --valid $< +check: postgres.sgml $(ALL_SGML) + $(PERL) $(srcdir)/t/sgml_syntax_check.pl --xmllint "$(XMLLINT)" --srcdir $(srcdir) ## @@ -261,18 +261,6 @@ clean-man: endif # sqlmansectnum != 7 -# tabs are harmless, but it is best to avoid them in SGML files -check-tabs: - @( ! grep ' ' $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \ - (echo "Tabs appear in SGML/XML files" 1>&2; exit 1) - -# Non-breaking spaces are harmless, but it is best to avoid them in SGML files. -# Use perl command because non-GNU grep or sed could not have hex escape sequence. -check-nbsp: - @ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ - $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \ - (echo "Non-breaking spaces appear in SGML/XML files" 1>&2; exit 1) - ## ## Clean ## diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build index 6ae192eac68..89d8b01c944 100644 --- a/doc/src/sgml/meson.build +++ b/doc/src/sgml/meson.build @@ -306,3 +306,26 @@ endif if alldocs.length() != 0 alias_target('alldocs', alldocs) endif + +sgml_syntax_check = files( + 't/sgml_syntax_check.pl' +) + +test( + 'sgml_syntax_check', + perl, + protocol: 'exitcode', + suite: 'doc', + args: [ + sgml_syntax_check, + '--xmllint', + '@0@ --nonet'.format(xmllint_bin.full_path()), + '--srcdir', + meson.current_source_dir(), + '--builddir', + meson.current_build_dir(), + ], + depends: doc_generated +) + +testprep_targets += doc_generated diff --git a/doc/src/sgml/t/sgml_syntax_check.pl b/doc/src/sgml/t/sgml_syntax_check.pl new file mode 100755 index 00000000000..7ff1d9a7c26 --- /dev/null +++ b/doc/src/sgml/t/sgml_syntax_check.pl @@ -0,0 +1,118 @@ +# /usr/bin/perl + +# doc/src/sgml/sgml_syntax_check.pl + +use strict; +use warnings FATAL => 'all'; +use Getopt::Long; + +use File::Find; + +my $xmllint; +my $srcdir; +my $builddir; + +GetOptions( + 'xmllint:s' => \$xmllint, + 'srcdir:s' => \$srcdir, + 'builddir:s' => \$builddir) or die "$0: wrong arguments"; + +die "$0: --srcdir must be specified\n" unless defined $srcdir; + +my $postgres_sgml = "postgres.sgml"; +my $xmlinclude = "--path . --path $srcdir"; +$xmlinclude .= " --path $builddir" if defined $builddir; + +# find files to process in check_tabs, check_nbsp will use additional files +my @files_to_process; +my @dirs_to_search = ($srcdir); +push @dirs_to_search, $builddir if defined $builddir; +find( + sub { + return unless -f $_; + return if $_ !~ /\.xsl$/; + push @files_to_process, $File::Find::name; + }, + @dirs_to_search,); + +push @dirs_to_search, "$srcdir/ref"; +find( + sub { + return unless -f $_; + return unless /\.sgml$/; + push @files_to_process, $File::Find::name; + }, + @dirs_to_search,); + + +# tabs are harmless, but it is best to avoid them in SGML files +sub check_tabs +{ + my @files = @files_to_process; + + my $errors = 0; + for my $f (@files) + { + open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!"; + while (<$fh>) + { + if (/\t/) + { + warn "Tab found in $f:$_"; + $errors++; + } + } + } + + if ($errors) + { + die "Tabs appear in SGML/XML files\n"; + } +} + +# non-breaking spaces are harmless, but it is best to avoid them in SGML files +sub check_nbsp +{ + my @files; + + # find additional '$srcdir/images/*.xsl' files to process in check_nbsp + find( + sub { + return unless -f $_; + return if $_ !~ /\.xsl$/; + push @files, $File::Find::name; + }, + "$srcdir/images",); + push @files, @files_to_process; + + my $errors = 0; + for my $f (@files) + { + open my $fh, "<:raw", $f or die "Can't open $f: $!"; + my $line_no = 0; + while (<$fh>) + { + $line_no++; + if (/\xC2\xA0/) + { + warn "$f:$line_no: contains non-breaking space\n"; + $errors++; + } + } + } + + if ($errors) + { + die "Non-breaking spaces appear in SGML/XML files\n"; + } +} + +sub run_xmllint +{ + my $cmd = "$xmllint $xmlinclude --noout --valid $postgres_sgml"; + system($cmd) == 0 or die "xmllint validation failed\n"; +} + +run_xmllint(); +check_tabs(); +check_nbsp(); diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index eca9d62fc22..1c937247a9a 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -627,6 +627,8 @@ task: TEST_JOBS: 8 IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma + XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml + CIRRUS_WORKING_DIR: ${HOME}/pgsql/ CCACHE_DIR: ${HOME}/ccache MACPORTS_CACHE: ${HOME}/macports-cache @@ -641,6 +643,7 @@ task: MACOS_PACKAGE_LIST: >- ccache + docbook-xml-4.5 icu kerberos5 lz4 -- 2.51.0