diff --git a/src/tools/check_keywords.pl b/src/tools/check_keywords.pl index 8d0d962..a5a01d2 100755 --- a/src/tools/check_keywords.pl +++ b/src/tools/check_keywords.pl @@ -1,111 +1,109 @@ -#!/usr/bin/perl -w +#!/usr/bin/perl use strict; +use warnings; +use diagnostics; +use Carp; # Check that the keyword lists in gram.y and kwlist.h are sane. Run from # the top directory, or pass a path to a top directory as argument. # # $PostgreSQL$ -my $path; +local $, = ' '; # set output field separator +local $\ = "\n"; # set output record separator -if (@ARGV) { - $path = $ARGV[0]; - shift @ARGV; -} else { - $path = "."; -} - -$[ = 1; # set array base to 1 -$, = ' '; # set output field separator -$\ = "\n"; # set output record separator +my $path = $ARGV[0] || '.'; -my %keyword_categories; -$keyword_categories{'unreserved_keyword'} = 'UNRESERVED_KEYWORD'; -$keyword_categories{'col_name_keyword'} = 'COL_NAME_KEYWORD'; -$keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD'; -$keyword_categories{'reserved_keyword'} = 'RESERVED_KEYWORD'; +my %keyword_categories = ( + unreserved_keyword => 'UNRESERVED_KEYWORD', + col_name_keyword => 'COL_NAME_KEYWORD', + type_func_name_keyword => 'TYPE_FUNC_NAME_KEYWORD', + reserved_keyword => 'RESERVED_KEYWORD', +); my $gram_filename = "$path/src/backend/parser/gram.y"; -open(GRAM, $gram_filename) || die("Could not open : $gram_filename"); -my ($S, $s, $k, $n, $kcat); +my ($S, $s, $k, $kcat); my $comment; -my @arr; my %keywords; -line: while () { - chomp; # strip record separator +open my $gram, '<', $gram_filename or croak "Could not open $gram_filename: $!"; +my @grammar = <$gram>; +close $gram; +line: foreach (@grammar) { + chomp; # strip record separator $S = $_; # Make sure any braces are split - $s = '{', $S =~ s/$s/ { /g; - $s = '}', $S =~ s/$s/ } /g; + $s = '{'; $S =~ s/$s/ { /xg; + $s = '}'; $S =~ s/$s/ } /xg; # Any comments are split - $s = '[/][*]', $S =~ s#$s# /* #g; - $s = '[*][/]', $S =~ s#$s# */ #g; + $s = '[/][*]'; $S =~ s#$s# /* #xg; + $s = '[*][/]'; $S =~ s#$s# */ #xg; if (!($kcat)) { - # Is this the beginning of a keyword list? - foreach $k (keys %keyword_categories) { - if ($S =~ m/^($k):/) { - $kcat = $k; - next line; - } - } - next line; + # Is this the beginning of a keyword list? + foreach my $k (keys %keyword_categories) { + if ($S =~ m/^($k):/x) { + $kcat = $k; + next line; + } + } + next line; } # Now split the line into individual fields - $n = (@arr = split(' ', $S)); + my @arr = split(' ', $S); + + my %comment_switch = ( + '*/' => 0, + '/*' => 1, + ); # Ok, we're in a keyword list. Go through each field in turn - for (my $fieldIndexer = 1; $fieldIndexer <= $n; $fieldIndexer++) { - if ($arr[$fieldIndexer] eq '*/' && $comment) { - $comment = 0; - next; - } - elsif ($comment) { - next; - } - elsif ($arr[$fieldIndexer] eq '/*') { - # start of a multiline comment - $comment = 1; - next; - } - elsif ($arr[$fieldIndexer] eq '//') { - next line; - } - - if ($arr[$fieldIndexer] eq ';') { - # end of keyword list - $kcat = ''; - next; - } - - if ($arr[$fieldIndexer] eq '|') { - next; - } - - # Put this keyword into the right list - push @{$keywords{$kcat}}, $arr[$fieldIndexer]; + for (0..$#arr) { + if ($arr[$_] eq '//') { + next line; + } + + if (exists $comment_switch{$arr[$_]}) { + $comment = $comment_switch{$arr[$_]}; + next; + } + + if ($comment) { + next; + } + + if ($arr[$_] eq ';') { + # end of keyword list + $kcat = ''; + next; + } + + if ($arr[$_] eq '|') { + next; + } + + # Put this keyword into the right list + push @{$keywords{$kcat}}, $arr[$_]; } } -close GRAM; # Check that all keywords are in alphabetical order -my ($prevkword, $kword, $bare_kword); -foreach $kcat (keys %keyword_categories) { +my ($prevkword, $bare_kword); +foreach my $kcat (keys %keyword_categories) { $prevkword = ''; - foreach $kword (@{$keywords{$kcat}}) { - # Some keyword have a _P suffix. Remove it for the comparison. - $bare_kword = $kword; - $bare_kword =~ s/_P$//; - if ($bare_kword le $prevkword) { - print "'$bare_kword' after '$prevkword' in $kcat list is misplaced"; - } - $prevkword = $bare_kword; + foreach my $kword (@{$keywords{$kcat}}) { + # Some keyword have a _P suffix. Remove it for the comparison. + $bare_kword = $kword; + $bare_kword =~ s/_P$//x; + if ($bare_kword le $prevkword) { + print "'$bare_kword' after '$prevkword' in $kcat list is misplaced"; + } + $prevkword = $bare_kword; } } @@ -115,7 +113,7 @@ foreach $kcat (keys %keyword_categories) { # with a dummy value. my %kwhashes; while ( my ($kcat, $kcat_id) = each(%keyword_categories) ) { - @arr = @{$keywords{$kcat}}; + my @arr = @{$keywords{$kcat}}; my $hash; foreach my $item (@arr) { $hash->{$item} = 1 } @@ -126,66 +124,69 @@ while ( my ($kcat, $kcat_id) = each(%keyword_categories) ) { # Now read in kwlist.h my $kwlist_filename = "$path/src/include/parser/kwlist.h"; -open(KWLIST, $kwlist_filename) || die("Could not open : $kwlist_filename"); my $prevkwstring = ''; my $bare_kwname; my %kwhash; -kwlist_line: while () { - my($line) = $_; - - if ($line =~ /^PG_KEYWORD\(\"(.*)\", (.*), (.*)\)/) - { - my($kwstring) = $1; - my($kwname) = $2; - my($kwcat_id) = $3; - - # Check that the list is in alphabetical order - if ($kwstring le $prevkwstring) { - print "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced"; - } - $prevkwstring = $kwstring; - - # Check that the keyword string is valid: all lower-case ASCII chars - if ($kwstring !~ /^[a-z_]*$/) { - print "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars"; - } - - # Check that the keyword name is valid: all upper-case ASCII chars - if ($kwname !~ /^[A-Z_]*$/) { - print "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars"; - } - - # Check that the keyword string matches keyword name - $bare_kwname = $kwname; - $bare_kwname =~ s/_P$//; - if ($bare_kwname ne uc($kwstring)) { - print "keyword name '$kwname' doesn't match keyword string '$kwstring'"; - } - - # Check that the keyword is present in the grammar - %kwhash = %{$kwhashes{$kwcat_id}}; - - if (!(%kwhash)) { - #print "Unknown kwcat_id: $kwcat_id"; - } else { - if (!($kwhash{$kwname})) { - print "'$kwname' not present in $kwcat_id section of gram.y"; - } else { - # Remove it from the hash, so that we can complain at the end - # if there's keywords left that were not found in kwlist.h - delete $kwhashes{$kwcat_id}->{$kwname}; - } - } + +open my $kwlist, '<', $kwlist_filename or croak "Could not open $kwlist_filename: $!"; +my @kwlist_lines = <$kwlist>; +close $kwlist; +kwlist_line: foreach (@kwlist_lines) { + my($kwstring, $kwname, $kwcat_id); + if (m{^PG_KEYWORD \( \"(.*)\", \s (.*), \s (.*) \) }x) { + ($kwstring, $kwname, $kwcat_id) = ($1, $2, $3); + } + else { + next kwlist_line; + } + + # Check that the list is in alphabetical order + if ($kwstring le $prevkwstring) { + print "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced"; + } + $prevkwstring = $kwstring; + + # Check that the keyword string is valid: all lower-case ASCII chars + if ($kwstring !~ /^[a-z_]*$/x) { + print "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars"; + } + + # Check that the keyword name is valid: all upper-case ASCII chars + if ($kwname !~ /^[A-Z_]*$/x) { + print "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars"; + } + + # Check that the keyword string matches keyword name + $bare_kwname = $kwname; + $bare_kwname =~ s/_P$//x; + if ($bare_kwname ne uc($kwstring)) { + print "keyword name '$kwname' doesn't match keyword string '$kwstring'"; + } + + # Check that the keyword is present in the grammar + %kwhash = %{$kwhashes{$kwcat_id}}; + + if (!(%kwhash)) { + #print "Unknown kwcat_id: $kwcat_id"; + } + else { + if (!($kwhash{$kwname})) { + print "'$kwname' not present in $kwcat_id section of gram.y"; + } + else { + # Remove it from the hash, so that we can complain at the end + # if there's keywords left that were not found in kwlist.h + delete $kwhashes{$kwcat_id}->{$kwname}; + } } } -close KWLIST; # Check that we've paired up all keywords from gram.y with lines in kwlist.h while ( my ($kwcat, $kwcat_id) = each(%keyword_categories) ) { %kwhash = %{$kwhashes{$kwcat_id}}; for my $kw ( keys %kwhash ) { - print "'$kw' found in gram.y $kwcat category, but not in kwlist.h" + print "'$kw' found in gram.y $kwcat category, but not in kwlist.h" } }