--- src/backend/parser/scan.l 2018-09-30 13:08:20.413221491 +0700 +++ src/interfaces/ecpg/preproc/pgc.l 2018-09-30 13:53:16.200036464 +0700 @@ -1,140 +1,99 @@ %top{ /*------------------------------------------------------------------------- * - * scan.l - * lexical scanner for PostgreSQL + * pgc.l + * lexical scanner for ecpg * - * NOTE NOTE NOTE: - * - * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l! - * - * The rules are designed so that the scanner never has to backtrack, - * in the sense that there is always a rule that can match the input - * consumed so far (the rule action may internally throw back some input - * with yyless(), however). As explained in the flex manual, this makes - * for a useful speed increase --- about a third faster than a plain -CF - * lexer, in simple testing. The extra complexity is mostly in the rules - * for handling float numbers and continued string literals. If you change - * the lexical rules, verify that you haven't broken the no-backtrack - * property by running flex with the "-b" option and checking that the - * resulting "lex.backup" file says that no backing up is needed. (As of - * Postgres 9.2, this check is made automatically by the Makefile.) + * This is a modified version of src/backend/parser/scan.l * * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * + * * IDENTIFICATION - * src/backend/parser/scan.l + * src/interfaces/ecpg/preproc/pgc.l * *------------------------------------------------------------------------- */ -#include "postgres.h" +#include "postgres_fe.h" #include -#include +#include #include "common/string.h" -#include "parser/gramparse.h" -#include "parser/parser.h" /* only needed for GUC variables */ -#include "parser/scansup.h" -#include "mb/pg_wchar.h" -} - -%{ - -/* LCOV_EXCL_START */ - -/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ -#undef fprintf -#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg) -static void -fprintf_to_ereport(const char *fmt, const char *msg) -{ - ereport(ERROR, (errmsg_internal("%s", msg))); +#include "extern.h" +#include "preproc.h" } -/* - * GUC variables. This is a DIRECT violation of the warning given at the - * head of gram.y, ie flex/bison code must not depend on any GUC variables; - * as such, changing their values can induce very unintuitive behavior. - * But we shall have to live with it until we can remove these variables. - */ -int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING; -bool escape_string_warning = true; -bool standard_conforming_strings = true; - -/* - * Set the type of YYSTYPE. - */ -#define YYSTYPE core_YYSTYPE +%{ +extern YYSTYPE base_yylval; -/* - * Set the type of yyextra. All state variables used by the scanner should - * be in yyextra, *not* statically allocated. - */ -#define YY_EXTRA_TYPE core_yy_extra_type * +static int xcdepth = 0; /* depth of nesting in slash-star comments */ +static char *dolqstart = NULL; /* current $foo$ quote start string */ /* - * Each call to yylex must set yylloc to the location of the found token - * (expressed as a byte offset from the start of the input text). - * When we parse a token that requires multiple lexer rules to process, - * this should be done in the first such rule, else yylloc will point - * into the middle of the token. - */ -#define SET_YYLLOC() (*(yylloc) = yytext - yyextra->scanbuf) - -/* - * Advance yylloc by the given number of bytes. - */ -#define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) ) - -#define startlit() ( yyextra->literallen = 0 ) -static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner); -static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); -static char *litbufdup(core_yyscan_t yyscanner); -static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner); -static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner); -static int process_integer_literal(const char *token, YYSTYPE *lval); -static bool is_utf16_surrogate_first(pg_wchar c); -static bool is_utf16_surrogate_second(pg_wchar c); -static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second); -static void addunicode(pg_wchar c, yyscan_t yyscanner); -static bool check_uescapechar(unsigned char escape); - -#define yyerror(msg) scanner_yyerror(msg, yyscanner) - -#define lexer_errposition() scanner_errposition(*(yylloc), yyscanner) - -static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner); -static void check_escape_warning(core_yyscan_t yyscanner); + * literalbuf is used to accumulate literal values when multiple rules + * are needed to parse a single literal. Call startlit to reset buffer + * to empty, addlit to add text. Note that the buffer is permanently + * malloc'd to the largest size needed so far in the current run. + */ +static char *literalbuf = NULL; /* expandable buffer */ +static int literallen; /* actual current length */ +static int literalalloc; /* current allocated buffer size */ + +/* Used for detecting global state together with braces_open */ +static int parenths_open; + +/* Used to tell parse_include() whether the command was #include or #include_next */ +static bool include_next; + +#define startlit() (literalbuf[0] = '\0', literallen = 0) +static void addlit(char *ytext, int yleng); +static void addlitchar (unsigned char); +static void parse_include (void); +static bool ecpg_isspace(char ch); +static bool isdefine(void); +static bool isinformixdefine(void); + +char *token_start; +static int state_before; + +struct _yy_buffer +{ + YY_BUFFER_STATE buffer; + long lineno; + char *filename; + struct _yy_buffer *next; +} *yy_buffer = NULL; + +static char *old; + +#define MAX_NESTED_IF 128 +static short preproc_tos; +static short ifcond; +static struct _if_value +{ + short condition; + short else_branch; +} stacked_if_value[MAX_NESTED_IF]; -/* - * Work around a bug in flex 2.5.35: it emits a couple of functions that - * it forgets to emit declarations for. Since we use -Wmissing-prototypes, - * this would cause warnings. Providing our own declarations should be - * harmless even when the bug gets fixed. - */ -extern int core_yyget_column(yyscan_t yyscanner); -extern void core_yyset_column(int column_no, yyscan_t yyscanner); +/* LCOV_EXCL_START */ %} -%option reentrant -%option bison-bridge -%option bison-locations %option 8bit %option never-interactive %option nodefault %option noinput -%option nounput %option noyywrap -%option noyyalloc -%option noyyrealloc -%option noyyfree %option warn -%option prefix="core_yy" +%option prefix="base_yy" + +%option yylineno + +%x C SQL incl def def_ident undef /* * OK, here is a short description of lex/flex rules behavior. @@ -148,41 +107,44 @@ * and to eliminate parsing troubles for numeric strings. * Exclusive states: * bit string literal - * extended C-style comments + * extended C-style comments in C + * extended C-style comments in SQL * delimited identifiers (double-quoted identifiers) + * * hexadecimal numeric string * standard quoted strings * extended quoted strings (support backslash escape sequences) + * national character quoted strings + * standard quoted strings in C * $foo$ quoted strings + * + * * quoted identifier with Unicode escapes - * end of a quoted identifier with Unicode escapes, UESCAPE can follow * quoted string with Unicode escapes - * end of a quoted string with Unicode escapes, UESCAPE can follow - * Unicode surrogate pair in extended quoted string - * - * Remember to add an <> case whenever you add a new exclusive state! - * The default one is probably not the right thing. */ %x xb -%x xc +%x xcc +%x xcsql %x xd +%x xdc %x xh %x xe +%x xn %x xq +%x xqc %x xdolq +%x xcond +%x xskip %x xui -%x xuiend %x xus -%x xusend -%x xeu /* * In order to make the world safe for Windows and Mac clients as well as * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n * sequence will be seen as two successive newlines, but that doesn't cause - * any problems. Comments that start with -- and extend to the next - * newline are treated as equivalent to a single whitespace character. + * any problems. SQL-style comments, which start with -- and extend to the + * next newline, are treated as equivalent to a single whitespace character. * * NOTE a fine point: if there is no newline following --, we will absorb * everything to the end of the input as a comment. This is correct. Older @@ -191,7 +153,7 @@ * * XXX perhaps \f (formfeed) should be treated as a newline as well? * - * XXX if you change the set of whitespace characters, fix scanner_isspace() + * XXX if you change the set of whitespace characters, fix ecpg_isspace() * to agree. */ @@ -211,32 +173,15 @@ * it, whereas {whitespace} should generally have a * after it... */ -special_whitespace ({space}+|{comment}{newline}) horiz_whitespace ({horiz_space}|{comment}) -whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) +whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) -/* - * To ensure that {quotecontinue} can be scanned without having to back up - * if the full pattern isn't matched, we include trailing whitespace in - * {quotestop}. This matches all cases where {quotecontinue} fails to match, - * except for {quote} followed by whitespace and just one "-" (not two, - * which would start a {comment}). To cover that we have {quotefail}. - * The actions for {quotestop} and {quotefail} must throw back characters - * beyond the quote proper. - */ quote ' quotestop {quote}{whitespace}* quotecontinue {quote}{whitespace_with_newline}{quote} quotefail {quote}{whitespace}*"-" /* Bit string - * It is tempting to scan the string for only those characters - * which are allowed. However, this leads to silently swallowed - * characters if illegal characters are included in the string. - * For example, if xbinside is [01] then B'ABCD' is interpreted - * as a zero-length string, and the ABCD' is lost! - * Better to pass the string forward and let the input routines - * validate the contents. */ xbstart [bB]{quote} xbinside [^']* @@ -255,13 +200,16 @@ xeoctesc [\\][0-7]{1,3} xehexesc [\\]x[0-9A-Fa-f]{1,2} xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}) -xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7}) + +/* C version of hex number */ +xch 0[xX][0-9A-Fa-f]* /* Extended quote * xqdouble implements embedded quote, '''' */ xqstart {quote} xqdouble {quote}{quote} +xqcquote [\\]{quote} xqinside [^']+ /* $foo$ style quotes ("dollar quoting") @@ -289,23 +237,22 @@ xdinside [^"]+ /* Unicode escapes */ +/* (The ecpg scanner is not backup-free, so the fail rules in scan.l are not needed here, but could be added if desired.) */ uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote} -/* error rule to avoid backup */ -uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU] /* Quoted identifier with Unicode escapes */ xuistart [uU]&{dquote} +xuistop {dquote}({whitespace}*{uescape})? /* Quoted string with Unicode escapes */ xusstart [uU]&{quote} +xusstop {quote}({whitespace}*{uescape})? -/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */ -xustop1 {uescapefail}? -xustop2 {uescape} - -/* error rule to avoid backup */ -xufailed [uU]& - +/* special stuff for C strings */ +xdcqq \\\\ +xdcqdq \\\" +xdcother [^"] +xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) /* C-style comments * @@ -335,6 +282,8 @@ identifier {ident_start}{ident_cont}* +array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])* + /* Assorted special-case operators and operator-like tokens */ typecast "::" dot_dot \.\. @@ -372,23 +321,58 @@ * instead we pass it separately to parser. there it gets * coerced via doNegate() -- Leon aug 20 1999 * - * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. - * * {realfail1} and {realfail2} are added to prevent the need for scanner * backup when the {real} rule fails to match completely. */ integer {digit}+ decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) -decimalfail {digit}+\.\. real ({integer}|{decimal})[Ee][-+]?{digit}+ realfail1 ({integer}|{decimal})[Ee] realfail2 ({integer}|{decimal})[Ee][-+] param \${integer} +/* special characters for other dbms */ +/* we have to react differently in compat mode */ +informix_special [\$] + other . +/* some stuff needed for ecpg */ +exec [eE][xX][eE][cC] +sql [sS][qQ][lL] +define [dD][eE][fF][iI][nN][eE] +include [iI][nN][cC][lL][uU][dD][eE] +include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT] +import [iI][mM][pP][oO][rR][tT] +undef [uU][nN][dD][eE][fF] + +ccomment "//".*\n + +if [iI][fF] +ifdef [iI][fF][dD][eE][fF] +ifndef [iI][fF][nN][dD][eE][fF] +else [eE][lL][sS][eE] +elif [eE][lL][iI][fF] +endif [eE][nN][dD][iI][fF] + +struct [sS][tT][rR][uU][cC][tT] + +exec_sql {exec}{space}*{sql}{space}* +ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit}) +ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit} + +/* we might want to parse all cpp include files */ +cppinclude {space}*#{include}{space}* +cppinclude_next {space}*#{include_next}{space}* + +/* take care of cpp lines, they may also be continuated */ +/* first a general line for all commands not starting with "i" */ +/* and then the other commands starting with "i", we have to add these + * separately because the cppline production would match on "include" too */ +cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline} + /* * Dollar quoted strings are totally opaque, and no escaping is done on them. * Other quoted strings must allow some special characters such as single-quote @@ -404,309 +388,240 @@ %% +%{ + /* code to execute during start of each call of yylex() */ + token_start = NULL; +%} + +{ {whitespace} { /* ignore */ } - {xcstart} { - /* Set location in case of syntax error in comment */ - SET_YYLLOC(); - yyextra->xcdepth = 0; - BEGIN(xc); + token_start = yytext; + state_before = YYSTATE; + xcdepth = 0; + BEGIN(xcsql); /* Put back any characters past slash-star; see above */ yyless(2); + fputs("/*", yyout); } +} /* */ -{xcstart} { - (yyextra->xcdepth)++; +{xcstart} { + token_start = yytext; + state_before = YYSTATE; + xcdepth = 0; + BEGIN(xcc); /* Put back any characters past slash-star; see above */ yyless(2); + fputs("/*", yyout); } - -{xcstop} { - if (yyextra->xcdepth <= 0) - BEGIN(INITIAL); +{xcstart} { ECHO; } +{xcstart} { + xcdepth++; + /* Put back any characters past slash-star; see above */ + yyless(2); + fputs("/_*", yyout); + } +{xcstop} { + if (xcdepth <= 0) + { + ECHO; + BEGIN(state_before); + token_start = NULL; + } else - (yyextra->xcdepth)--; + { + xcdepth--; + fputs("*_/", yyout); + } + } +{xcstop} { + ECHO; + BEGIN(state_before); + token_start = NULL; } -{ +{ {xcinside} { - /* ignore */ + ECHO; } - {op_chars} { - /* ignore */ + ECHO; } - \*+ { - /* ignore */ + ECHO; } - <> { - yyerror("unterminated /* comment"); + mmfatal(PARSE_ERROR, "unterminated /* comment"); } -} /* */ +} /* */ +{ {xbstart} { - /* Binary bit type. - * At some point we should simply pass the string - * forward to the parser and label it there. - * In the meantime, place a leading "b" on the string - * to mark it for the input routine as a binary string. - */ - SET_YYLLOC(); + token_start = yytext; BEGIN(xb); startlit(); - addlitchar('b', yyscanner); + addlitchar('b'); } +} /* */ + {quotestop} | {quotefail} { yyless(1); - BEGIN(INITIAL); - yylval->str = litbufdup(yyscanner); + BEGIN(SQL); + if (literalbuf[strspn(literalbuf, "01") + 1] != '\0') + mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal"); + base_yylval.str = mm_strdup(literalbuf); return BCONST; } + {xhinside} | {xbinside} { - addlit(yytext, yyleng, yyscanner); + addlit(yytext, yyleng); } {quotecontinue} | {quotecontinue} { /* ignore */ } -<> { yyerror("unterminated bit string literal"); } +<> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); } -{xhstart} { - /* Hexadecimal bit type. - * At some point we should simply pass the string - * forward to the parser and label it there. - * In the meantime, place a leading "x" on the string - * to mark it for the input routine as a hex string. - */ - SET_YYLLOC(); +{xhstart} { + token_start = yytext; BEGIN(xh); startlit(); - addlitchar('x', yyscanner); + addlitchar('x'); } {quotestop} | {quotefail} { yyless(1); - BEGIN(INITIAL); - yylval->str = litbufdup(yyscanner); + BEGIN(SQL); + base_yylval.str = mm_strdup(literalbuf); return XCONST; } -<> { yyerror("unterminated hexadecimal string literal"); } +<> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); } +{xqstart} { + token_start = yytext; + state_before = YYSTATE; + BEGIN(xqc); + startlit(); + } + +{ {xnstart} { /* National character. - * We will pass this along as a normal character string, - * but preceded with an internally-generated "NCHAR". + * Transfer it as-is to the backend. */ - const ScanKeyword *keyword; - - SET_YYLLOC(); - yyless(1); /* eat only 'n' this time */ - - keyword = ScanKeywordLookup("nchar", - yyextra->keywords, - yyextra->num_keywords); - if (keyword != NULL) - { - yylval->keyword = keyword->name; - return keyword->value; - } - else - { - /* If NCHAR isn't a keyword, just return "n" */ - yylval->str = pstrdup("n"); - return IDENT; - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xn); + startlit(); } - {xqstart} { - yyextra->warn_on_first_escape = true; - yyextra->saw_non_ascii = false; - SET_YYLLOC(); - if (yyextra->standard_conforming_strings) - BEGIN(xq); - else - BEGIN(xe); + token_start = yytext; + state_before = YYSTATE; + BEGIN(xq); startlit(); } {xestart} { - yyextra->warn_on_first_escape = false; - yyextra->saw_non_ascii = false; - SET_YYLLOC(); + token_start = yytext; + state_before = YYSTATE; BEGIN(xe); startlit(); } {xusstart} { - SET_YYLLOC(); - if (!yyextra->standard_conforming_strings) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("unsafe use of string constant with Unicode escapes"), - errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."), - lexer_errposition())); + token_start = yytext; + state_before = YYSTATE; BEGIN(xus); startlit(); + addlit(yytext, yyleng); } -{quotestop} | -{quotefail} { +} /* */ + +{quotestop} | +{quotefail} { yyless(1); - BEGIN(INITIAL); - /* - * check that the data remains valid if it might have been - * made invalid by unescaping any chars. - */ - if (yyextra->saw_non_ascii) - pg_verifymbstr(yyextra->literalbuf, - yyextra->literallen, - false); - yylval->str = litbufdup(yyscanner); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); return SCONST; } -{quotestop} | -{quotefail} { - /* throw back all but the quote */ +{quotestop} | +{quotefail} { yyless(1); - /* xusend state looks for possible UESCAPE */ - BEGIN(xusend); - } -{whitespace} { - /* stay in xusend state over whitespace */ - } -<> | -{other} | -{xustop1} { - /* no UESCAPE after the quote, throw back everything */ - yyless(0); - BEGIN(INITIAL); - yylval->str = litbuf_udeescape('\\', yyscanner); - return SCONST; - } -{xustop2} { - /* found UESCAPE after the end quote */ - BEGIN(INITIAL); - if (!check_uescapechar(yytext[yyleng - 2])) - { - SET_YYLLOC(); - ADVANCE_YYLLOC(yyleng - 2); - yyerror("invalid Unicode escape character"); - } - yylval->str = litbuf_udeescape(yytext[yyleng - 2], - yyscanner); - return SCONST; - } -{xqdouble} { - addlitchar('\'', yyscanner); - } -{xqinside} { - addlit(yytext, yyleng, yyscanner); - } -{xeinside} { - addlit(yytext, yyleng, yyscanner); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return ECONST; } -{xeunicode} { - pg_wchar c = strtoul(yytext + 2, NULL, 16); - - check_escape_warning(yyscanner); - - if (is_utf16_surrogate_first(c)) - { - yyextra->utf16_first_part = c; - BEGIN(xeu); - } - else if (is_utf16_surrogate_second(c)) - yyerror("invalid Unicode surrogate pair"); - else - addunicode(c, yyscanner); - } -{xeunicode} { - pg_wchar c = strtoul(yytext + 2, NULL, 16); - - if (!is_utf16_surrogate_second(c)) - yyerror("invalid Unicode surrogate pair"); - - c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c); - - addunicode(c, yyscanner); - - BEGIN(xe); - } -. { yyerror("invalid Unicode surrogate pair"); } -\n { yyerror("invalid Unicode surrogate pair"); } -<> { yyerror("invalid Unicode surrogate pair"); } -{xeunicodefail} { - ereport(ERROR, - (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), - errmsg("invalid Unicode escape"), - errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."), - lexer_errposition())); +{quotestop} | +{quotefail} { + yyless(1); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return NCONST; + } +{xusstop} { + addlit(yytext, yyleng); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return UCONST; + } +{xqdouble} { addlitchar('\''); } +{xqcquote} { + addlitchar('\\'); + addlitchar('\''); + } +{xqinside} { addlit(yytext, yyleng); } +{xeinside} { + addlit(yytext, yyleng); + } +{xeunicode} { + addlit(yytext, yyleng); + } +{xeescape} { + addlit(yytext, yyleng); } -{xeescape} { - if (yytext[1] == '\'') - { - if (yyextra->backslash_quote == BACKSLASH_QUOTE_OFF || - (yyextra->backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING && - PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding()))) - ereport(ERROR, - (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), - errmsg("unsafe use of \\' in a string literal"), - errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."), - lexer_errposition())); - } - check_string_escape_warning(yytext[1], yyscanner); - addlitchar(unescape_single_char(yytext[1], yyscanner), - yyscanner); - } -{xeoctesc} { - unsigned char c = strtoul(yytext + 1, NULL, 8); - - check_escape_warning(yyscanner); - addlitchar(c, yyscanner); - if (c == '\0' || IS_HIGHBIT_SET(c)) - yyextra->saw_non_ascii = true; - } -{xehexesc} { - unsigned char c = strtoul(yytext + 2, NULL, 16); - - check_escape_warning(yyscanner); - addlitchar(c, yyscanner); - if (c == '\0' || IS_HIGHBIT_SET(c)) - yyextra->saw_non_ascii = true; +{xeoctesc} { + addlit(yytext, yyleng); } -{quotecontinue} { - /* ignore */ +{xehexesc} { + addlit(yytext, yyleng); } +{quotecontinue} { /* ignore */ } . { /* This is only needed for \ just before EOF */ - addlitchar(yytext[0], yyscanner); + addlitchar(yytext[0]); } -<> { yyerror("unterminated quoted string"); } +<> { mmfatal(PARSE_ERROR, "unterminated quoted string"); } -{dolqdelim} { - SET_YYLLOC(); - yyextra->dolqstart = pstrdup(yytext); - BEGIN(xdolq); - startlit(); - } +{ {dolqfailed} { - SET_YYLLOC(); /* throw back all but the initial "$" */ yyless(1); /* and treat it as {other} */ return yytext[0]; } +{dolqdelim} { + token_start = yytext; + if (dolqstart) + free(dolqstart); + dolqstart = mm_strdup(yytext); + BEGIN(xdolq); + startlit(); + addlit(yytext, yyleng); + } +} /* */ + {dolqdelim} { - if (strcmp(yytext, yyextra->dolqstart) == 0) + if (strcmp(yytext, dolqstart) == 0) { - pfree(yyextra->dolqstart); - yyextra->dolqstart = NULL; - BEGIN(INITIAL); - yylval->str = litbufdup(yyscanner); - return SCONST; + addlit(yytext, yyleng); + free(dolqstart); + dolqstart = NULL; + BEGIN(SQL); + base_yylval.str = mm_strdup(literalbuf); + return DOLCONST; } else { @@ -715,159 +630,126 @@ * the $... part to the output, but put back the final * $ for rescanning. Consider $delim$...$junk$delim$ */ - addlit(yytext, yyleng - 1, yyscanner); - yyless(yyleng - 1); + addlit(yytext, yyleng-1); + yyless(yyleng-1); } } {dolqinside} { - addlit(yytext, yyleng, yyscanner); + addlit(yytext, yyleng); } {dolqfailed} { - addlit(yytext, yyleng, yyscanner); + addlit(yytext, yyleng); } . { - /* This is only needed for $ inside the quoted text */ - addlitchar(yytext[0], yyscanner); + /* single quote or dollar sign */ + addlitchar(yytext[0]); } -<> { yyerror("unterminated dollar-quoted string"); } +<> { mmerror(PARSE_ERROR, ET_ERROR, "unterminated dollar-quoted string"); } +{ {xdstart} { - SET_YYLLOC(); + state_before = YYSTATE; BEGIN(xd); startlit(); } {xuistart} { - SET_YYLLOC(); + state_before = YYSTATE; BEGIN(xui); startlit(); + addlit(yytext, yyleng); } -{xdstop} { - char *ident; +} /* */ - BEGIN(INITIAL); - if (yyextra->literallen == 0) - yyerror("zero-length delimited identifier"); - ident = litbufdup(yyscanner); - if (yyextra->literallen >= NAMEDATALEN) - truncate_identifier(ident, yyextra->literallen, true); - yylval->str = ident; - return IDENT; - } -{dquote} { - yyless(1); - /* xuiend state looks for possible UESCAPE */ - BEGIN(xuiend); - } -{whitespace} { - /* stay in xuiend state over whitespace */ - } -<> | -{other} | -{xustop1} { - /* no UESCAPE after the quote, throw back everything */ - char *ident; - int identlen; - - yyless(0); - - BEGIN(INITIAL); - if (yyextra->literallen == 0) - yyerror("zero-length delimited identifier"); - ident = litbuf_udeescape('\\', yyscanner); - identlen = strlen(ident); - if (identlen >= NAMEDATALEN) - truncate_identifier(ident, identlen, true); - yylval->str = ident; - return IDENT; - } -{xustop2} { - /* found UESCAPE after the end quote */ - char *ident; - int identlen; - - BEGIN(INITIAL); - if (yyextra->literallen == 0) - yyerror("zero-length delimited identifier"); - if (!check_uescapechar(yytext[yyleng - 2])) - { - SET_YYLLOC(); - ADVANCE_YYLLOC(yyleng - 2); - yyerror("invalid Unicode escape character"); - } - ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner); - identlen = strlen(ident); - if (identlen >= NAMEDATALEN) - truncate_identifier(ident, identlen, true); - yylval->str = ident; - return IDENT; +{xdstop} { + BEGIN(state_before); + if (literallen == 0) + mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); + /* The backend will truncate the identifier here. We do not as it does not change the result. */ + base_yylval.str = mm_strdup(literalbuf); + return CSTRING; + } +{xdstop} { + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return CSTRING; + } +{xuistop} { + BEGIN(state_before); + if (literallen == 2) /* "U&" */ + mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); + /* The backend will truncate the identifier here. We do not as it does not change the result. */ + addlit(yytext, yyleng); + base_yylval.str = mm_strdup(literalbuf); + return UIDENT; } {xddouble} { - addlitchar('"', yyscanner); + addlitchar('"'); } {xdinside} { - addlit(yytext, yyleng, yyscanner); + addlit(yytext, yyleng); } -<> { yyerror("unterminated quoted identifier"); } - -{xufailed} { - char *ident; - - SET_YYLLOC(); - /* throw back all but the initial u/U */ - yyless(1); - /* and treat it as {identifier} */ - ident = downcase_truncate_identifier(yytext, yyleng, true); - yylval->str = ident; - return IDENT; +<> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); } +{xdstart} { + state_before = YYSTATE; + BEGIN(xdc); + startlit(); } +{xdcinside} { addlit(yytext, yyleng); } +{ {typecast} { - SET_YYLLOC(); return TYPECAST; } {dot_dot} { - SET_YYLLOC(); return DOT_DOT; } {colon_equals} { - SET_YYLLOC(); return COLON_EQUALS; } {equals_greater} { - SET_YYLLOC(); return EQUALS_GREATER; } {less_equals} { - SET_YYLLOC(); return LESS_EQUALS; } {greater_equals} { - SET_YYLLOC(); return GREATER_EQUALS; } {less_greater} { /* We accept both "<>" and "!=" as meaning NOT_EQUALS */ - SET_YYLLOC(); return NOT_EQUALS; } {not_equals} { /* We accept both "<>" and "!=" as meaning NOT_EQUALS */ - SET_YYLLOC(); return NOT_EQUALS; } +{informix_special} { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + unput(':'); + } + else + return yytext[0]; + } {self} { - SET_YYLLOC(); + /* + * We may find a ';' inside a structure + * definition in a TYPE or VAR statement. + * This is not an EOL marker. + */ + if (yytext[0] == ';' && struct_level == 0) + BEGIN(C); return yytext[0]; } - {operator} { /* * Check for embedded slash-star or dash-dash; those @@ -927,8 +809,6 @@ } } - SET_YYLLOC(); - if (nchars < yyleng) { /* Strip the unwanted chars from the token */ @@ -963,628 +843,773 @@ } } - /* - * Complain if operator is too long. Unlike the case - * for identifiers, we make this an error not a notice- - * and-truncate, because the odds are we are looking at - * a syntactic mistake anyway. - */ - if (nchars >= NAMEDATALEN) - yyerror("operator too long"); - - yylval->str = pstrdup(yytext); + base_yylval.str = mm_strdup(yytext); return Op; } - {param} { - SET_YYLLOC(); - yylval->ival = atol(yytext + 1); + base_yylval.ival = atol(yytext+1); return PARAM; } +{ip} { + base_yylval.str = mm_strdup(yytext); + return IP; + } +} /* */ +{ {integer} { - SET_YYLLOC(); - return process_integer_literal(yytext, yylval); + int val; + char* endptr; + + errno = 0; + val = strtoint(yytext, &endptr, 10); + if (*endptr != '\0' || errno == ERANGE) + { + errno = 0; + base_yylval.str = mm_strdup(yytext); + return FCONST; + } + base_yylval.ival = val; + return ICONST; } {decimal} { - SET_YYLLOC(); - yylval->str = pstrdup(yytext); + base_yylval.str = mm_strdup(yytext); return FCONST; } -{decimalfail} { - /* throw back the .., and treat as integer */ - yyless(yyleng - 2); - SET_YYLLOC(); - return process_integer_literal(yytext, yylval); - } {real} { - SET_YYLLOC(); - yylval->str = pstrdup(yytext); - return FCONST; + base_yylval.str = mm_strdup(yytext); + return FCONST; } +} /* */ + +{ {realfail1} { - /* - * throw back the [Ee], and treat as {decimal}. Note - * that it is possible the input is actually {integer}, - * but since this case will almost certainly lead to a - * syntax error anyway, we don't bother to distinguish. - */ - yyless(yyleng - 1); - SET_YYLLOC(); - yylval->str = pstrdup(yytext); + yyless(yyleng-1); + base_yylval.str = mm_strdup(yytext); return FCONST; } {realfail2} { - /* throw back the [Ee][+-], and proceed as above */ - yyless(yyleng - 2); - SET_YYLLOC(); - yylval->str = pstrdup(yytext); + yyless(yyleng-2); + base_yylval.str = mm_strdup(yytext); return FCONST; } - - +:{identifier}((("->"|\.){identifier})|(\[{array}\]))* { + base_yylval.str = mm_strdup(yytext+1); + return CVARIABLE; + } {identifier} { - const ScanKeyword *keyword; - char *ident; + const ScanKeyword *keyword; - SET_YYLLOC(); - - /* Is it a keyword? */ - keyword = ScanKeywordLookup(yytext, - yyextra->keywords, - yyextra->num_keywords); - if (keyword != NULL) + if (!isdefine()) { - yylval->keyword = keyword->name; - return keyword->value; - } + /* Is it an SQL/ECPG keyword? */ + keyword = ScanECPGKeywordLookup(yytext); + if (keyword != NULL) + return keyword->value; + + /* Is it a C keyword? */ + keyword = ScanCKeywordLookup(yytext); + if (keyword != NULL) + return keyword->value; - /* - * No. Convert the identifier to lower case, and truncate - * if necessary. - */ - ident = downcase_truncate_identifier(yytext, yyleng, true); - yylval->str = ident; - return IDENT; + /* + * None of the above. Return it as an identifier. + * + * The backend will attempt to truncate and case-fold + * the identifier, but I see no good reason for ecpg + * to do so; that's just another way that ecpg could get + * out of step with the backend. + */ + base_yylval.str = mm_strdup(yytext); + return IDENT; + } } {other} { - SET_YYLLOC(); return yytext[0]; } +} /* */ -<> { - SET_YYLLOC(); - yyterminate(); - } +{exec_sql} { BEGIN(SQL); return SQL_START; } +{informix_special} { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + BEGIN(SQL); + return SQL_START; + } + else + return S_ANYTHING; + } +{ccomment} { ECHO; } +{xch} { + char* endptr; + + errno = 0; + base_yylval.ival = strtoul((char *)yytext,&endptr,16); + if (*endptr != '\0' || errno == ERANGE) + { + errno = 0; + base_yylval.str = mm_strdup(yytext); + return SCONST; + } + return ICONST; + } +{cppinclude} { + if (system_includes) + { + include_next = false; + BEGIN(incl); + } + else + { + base_yylval.str = mm_strdup(yytext); + return CPP_LINE; + } + } +{cppinclude_next} { + if (system_includes) + { + include_next = true; + BEGIN(incl); + } + else + { + base_yylval.str = mm_strdup(yytext); + return CPP_LINE; + } + } +{cppline} { + base_yylval.str = mm_strdup(yytext); + return CPP_LINE; + } +{identifier} { + const ScanKeyword *keyword; -%% + /* + * Try to detect a function name: + * look for identifiers at the global scope + * keep the last identifier before the first '(' and '{' */ + if (braces_open == 0 && parenths_open == 0) + { + if (current_function) + free(current_function); + current_function = mm_strdup(yytext); + } + /* Informix uses SQL defines only in SQL space */ + /* however, some defines have to be taken care of for compatibility */ + if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine()) + { + keyword = ScanCKeywordLookup(yytext); + if (keyword != NULL) + return keyword->value; + else + { + base_yylval.str = mm_strdup(yytext); + return IDENT; + } + } + } +{xcstop} { mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments"); } +":" { return ':'; } +";" { return ';'; } +"," { return ','; } +"*" { return '*'; } +"%" { return '%'; } +"/" { return '/'; } +"+" { return '+'; } +"-" { return '-'; } +"(" { parenths_open++; return '('; } +")" { parenths_open--; return ')'; } +{space} { ECHO; } +\{ { return '{'; } +\} { return '}'; } +\[ { return '['; } +\] { return ']'; } +\= { return '='; } +"->" { return S_MEMBER; } +">>" { return S_RSHIFT; } +"<<" { return S_LSHIFT; } +"||" { return S_OR; } +"&&" { return S_AND; } +"++" { return S_INC; } +"--" { return S_DEC; } +"==" { return S_EQUAL; } +"!=" { return S_NEQUAL; } +"+=" { return S_ADD; } +"-=" { return S_SUB; } +"*=" { return S_MUL; } +"/=" { return S_DIV; } +"%=" { return S_MOD; } +"->*" { return S_MEMPOINT; } +".*" { return S_DOTPOINT; } +{other} { return S_ANYTHING; } +{exec_sql}{define}{space}* { BEGIN(def_ident); } +{informix_special}{define}{space}* { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + BEGIN(def_ident); + } + else + { + yyless(1); + return S_ANYTHING; + } + } +{exec_sql}{undef}{space}* { BEGIN(undef); } +{informix_special}{undef}{space}* { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + BEGIN(undef); + } + else + { + yyless(1); + return S_ANYTHING; + } + } +{identifier}{space}*";" { + struct _defines *ptr, *ptr2 = NULL; + int i; -/* LCOV_EXCL_STOP */ + /* + * Skip the ";" and trailing whitespace. Note that yytext + * contains at least one non-space character plus the ";" + */ + for (i = strlen(yytext)-2; + i > 0 && ecpg_isspace(yytext[i]); + i-- ) + ; + yytext[i+1] = '\0'; -/* - * Arrange access to yyextra for subroutines of the main yylex() function. - * We expect each subroutine to have a yyscanner parameter. Rather than - * use the yyget_xxx functions, which might or might not get inlined by the - * compiler, we cheat just a bit and cast yyscanner to the right type. - */ -#undef yyextra -#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r) -/* Likewise for a couple of other things we need. */ -#undef yylloc -#define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r) -#undef yyleng -#define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r) + for (ptr = defines; ptr != NULL; ptr2 = ptr, ptr = ptr->next) + { + if (strcmp(yytext, ptr->old) == 0) + { + if (ptr2 == NULL) + defines = ptr->next; + else + ptr2->next = ptr->next; + free(ptr->new); + free(ptr->old); + free(ptr); + break; + } + } + BEGIN(C); + } +{other}|\n { + mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command"); + yyterminate(); + } +{exec_sql}{include}{space}* { BEGIN(incl); } +{informix_special}{include}{space}* { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + BEGIN(incl); + } + else + { + yyless(1); + return S_ANYTHING; + } + } +{exec_sql}{ifdef}{space}* { ifcond = true; BEGIN(xcond); } +{informix_special}{ifdef}{space}* { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + ifcond = true; + BEGIN(xcond); + } + else + { + yyless(1); + return S_ANYTHING; + } + } +{exec_sql}{ifndef}{space}* { ifcond = false; BEGIN(xcond); } +{informix_special}{ifndef}{space}* { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + ifcond = false; + BEGIN(xcond); + } + else + { + yyless(1); + return S_ANYTHING; + } + } +{exec_sql}{elif}{space}* { /* pop stack */ + if ( preproc_tos == 0 ) { + mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\""); + } + else if ( stacked_if_value[preproc_tos].else_branch ) + mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\""); + else + preproc_tos--; -/* - * scanner_errposition - * Report a lexer or grammar error cursor position, if possible. - * - * This is expected to be used within an ereport() call. The return value - * is a dummy (always 0, in fact). - * - * Note that this can only be used for messages emitted during raw parsing - * (essentially, scan.l and gram.y), since it requires the yyscanner struct - * to still be available. - */ -int -scanner_errposition(int location, core_yyscan_t yyscanner) -{ - int pos; + ifcond = true; BEGIN(xcond); + } +{informix_special}{elif}{space}* { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + if (preproc_tos == 0) + mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\""); + else if (stacked_if_value[preproc_tos].else_branch) + mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\""); + else + preproc_tos--; - if (location < 0) - return 0; /* no-op if location is unknown */ + ifcond = true; + BEGIN(xcond); + } + else + { + yyless(1); + return S_ANYTHING; + } + } - /* Convert byte offset to character number */ - pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1; - /* And pass it to the ereport mechanism */ - return errposition(pos); -} +{exec_sql}{else}{space}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */ + if (stacked_if_value[preproc_tos].else_branch) + mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE"); + else + { + stacked_if_value[preproc_tos].else_branch = true; + stacked_if_value[preproc_tos].condition = + (stacked_if_value[preproc_tos-1].condition && + !stacked_if_value[preproc_tos].condition); + + if (stacked_if_value[preproc_tos].condition) + BEGIN(C); + else + BEGIN(xskip); + } + } +{informix_special}{else}{space}*";" { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + if (stacked_if_value[preproc_tos].else_branch) + mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE"); + else + { + stacked_if_value[preproc_tos].else_branch = true; + stacked_if_value[preproc_tos].condition = + (stacked_if_value[preproc_tos-1].condition && + !stacked_if_value[preproc_tos].condition); + + if (stacked_if_value[preproc_tos].condition) + BEGIN(C); + else + BEGIN(xskip); + } + } + else + { + yyless(1); + return S_ANYTHING; + } + } +{exec_sql}{endif}{space}*";" { + if (preproc_tos == 0) + mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF"); + else + preproc_tos--; -/* - * scanner_yyerror - * Report a lexer or grammar error. - * - * The message's cursor position is whatever YYLLOC was last set to, - * ie, the start of the current token if called within yylex(), or the - * most recently lexed token if called from the grammar. - * This is OK for syntax error messages from the Bison parser, because Bison - * parsers report error as soon as the first unparsable token is reached. - * Beware of using yyerror for other purposes, as the cursor position might - * be misleading! - */ -void -scanner_yyerror(const char *message, core_yyscan_t yyscanner) -{ - const char *loc = yyextra->scanbuf + *yylloc; + if (stacked_if_value[preproc_tos].condition) + BEGIN(C); + else + BEGIN(xskip); + } +{informix_special}{endif}{space}*";" { + /* are we simulating Informix? */ + if (INFORMIX_MODE) + { + if (preproc_tos == 0) + mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF"); + else + preproc_tos--; + + if (stacked_if_value[preproc_tos].condition) + BEGIN(C); + else + BEGIN(xskip); + } + else + { + yyless(1); + return S_ANYTHING; + } + } - if (*loc == YY_END_OF_BUFFER_CHAR) - { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - /* translator: %s is typically the translation of "syntax error" */ - errmsg("%s at end of input", _(message)), - lexer_errposition())); - } - else - { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - /* translator: first %s is typically the translation of "syntax error" */ - errmsg("%s at or near \"%s\"", _(message), loc), - lexer_errposition())); - } -} +{other} { /* ignore */ } +{identifier}{space}*";" { + if (preproc_tos >= MAX_NESTED_IF-1) + mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions"); + else + { + struct _defines *defptr; + unsigned int i; -/* - * Called before any actual parsing is done - */ -core_yyscan_t -scanner_init(const char *str, - core_yy_extra_type *yyext, - const ScanKeyword *keywords, - int num_keywords) -{ - Size slen = strlen(str); - yyscan_t scanner; + /* + * Skip the ";" and trailing whitespace. Note that yytext + * contains at least one non-space character plus the ";" + */ + for (i = strlen(yytext)-2; + i > 0 && ecpg_isspace(yytext[i]); + i-- ) + ; + yytext[i+1] = '\0'; + + for (defptr = defines; + defptr != NULL && strcmp(yytext, defptr->old) != 0; + defptr = defptr->next); + + preproc_tos++; + stacked_if_value[preproc_tos].else_branch = false; + stacked_if_value[preproc_tos].condition = + (defptr ? ifcond : !ifcond) && stacked_if_value[preproc_tos-1].condition; + } - if (yylex_init(&scanner) != 0) - elog(ERROR, "yylex_init() failed: %m"); + if (stacked_if_value[preproc_tos].condition) + BEGIN(C); + else + BEGIN(xskip); + } - core_yyset_extra(yyext, scanner); +{other}|\n { + mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command"); + yyterminate(); + } +{identifier} { + old = mm_strdup(yytext); + BEGIN(def); + startlit(); + } +{other}|\n { + mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command"); + yyterminate(); + } +{space}*";" { + struct _defines *ptr, *this; - yyext->keywords = keywords; - yyext->num_keywords = num_keywords; + for (ptr = defines; ptr != NULL; ptr = ptr->next) + { + if (strcmp(old, ptr->old) == 0) + { + free(ptr->new); + ptr->new = mm_strdup(literalbuf); + } + } + if (ptr == NULL) + { + this = (struct _defines *) mm_alloc(sizeof(struct _defines)); - yyext->backslash_quote = backslash_quote; - yyext->escape_string_warning = escape_string_warning; - yyext->standard_conforming_strings = standard_conforming_strings; + /* initial definition */ + this->old = old; + this->new = mm_strdup(literalbuf); + this->next = defines; + this->used = NULL; + defines = this; + } - /* - * Make a scan buffer with special termination needed by flex. - */ - yyext->scanbuf = (char *) palloc(slen + 2); - yyext->scanbuflen = slen; - memcpy(yyext->scanbuf, str, slen); - yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; - yy_scan_buffer(yyext->scanbuf, slen + 2, scanner); + BEGIN(C); + } +[^;] { addlit(yytext, yyleng); } +\<[^\>]+\>{space}*";"? { parse_include(); } +{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); } +[^;\<\>\"]+";" { parse_include(); } +{other}|\n { + mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command"); + yyterminate(); + } - /* initialize literal buffer to a reasonable but expansible size */ - yyext->literalalloc = 1024; - yyext->literalbuf = (char *) palloc(yyext->literalalloc); - yyext->literallen = 0; +<> { + if (yy_buffer == NULL) + { + if ( preproc_tos > 0 ) + { + preproc_tos = 0; + mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\""); + } + yyterminate(); + } + else + { + struct _yy_buffer *yb = yy_buffer; + int i; + struct _defines *ptr; + + for (ptr = defines; ptr; ptr = ptr->next) + if (ptr->used == yy_buffer) + { + ptr->used = NULL; + break; + } - return scanner; -} + if (yyin != NULL) + fclose(yyin); + yy_delete_buffer( YY_CURRENT_BUFFER ); + yy_switch_to_buffer(yy_buffer->buffer); -/* - * Called after parsing is done to clean up after scanner_init() - */ -void -scanner_finish(core_yyscan_t yyscanner) -{ - /* - * We don't bother to call yylex_destroy(), because all it would do is - * pfree a small amount of control storage. It's cheaper to leak the - * storage until the parsing context is destroyed. The amount of space - * involved is usually negligible compared to the output parse tree - * anyway. - * - * We do bother to pfree the scanbuf and literal buffer, but only if they - * represent a nontrivial amount of space. The 8K cutoff is arbitrary. - */ - if (yyextra->scanbuflen >= 8192) - pfree(yyextra->scanbuf); - if (yyextra->literalalloc >= 8192) - pfree(yyextra->literalbuf); -} + yylineno = yy_buffer->lineno; + /* We have to output the filename only if we change files here */ + i = strcmp(input_filename, yy_buffer->filename); -static void -addlit(char *ytext, int yleng, core_yyscan_t yyscanner) -{ - /* enlarge buffer if needed */ - if ((yyextra->literallen + yleng) >= yyextra->literalalloc) - { - do - { - yyextra->literalalloc *= 2; - } while ((yyextra->literallen + yleng) >= yyextra->literalalloc); - yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, - yyextra->literalalloc); - } - /* append new data */ - memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng); - yyextra->literallen += yleng; -} + free(input_filename); + input_filename = yy_buffer->filename; + yy_buffer = yy_buffer->next; + free(yb); -static void -addlitchar(unsigned char ychar, core_yyscan_t yyscanner) -{ - /* enlarge buffer if needed */ - if ((yyextra->literallen + 1) >= yyextra->literalalloc) - { - yyextra->literalalloc *= 2; - yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, - yyextra->literalalloc); - } - /* append new data */ - yyextra->literalbuf[yyextra->literallen] = ychar; - yyextra->literallen += 1; -} + if (i != 0) + output_line_number(); + } + } +{other}|\n { mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to "); } -/* - * Create a palloc'd copy of literalbuf, adding a trailing null. - */ -static char * -litbufdup(core_yyscan_t yyscanner) -{ - int llen = yyextra->literallen; - char *new; +%% - new = palloc(llen + 1); - memcpy(new, yyextra->literalbuf, llen); - new[llen] = '\0'; - return new; -} +/* LCOV_EXCL_STOP */ -static int -process_integer_literal(const char *token, YYSTYPE *lval) +void +lex_init(void) { - int val; - char *endptr; + braces_open = 0; + parenths_open = 0; + current_function = NULL; + + preproc_tos = 0; + yylineno = 1; + ifcond = true; + stacked_if_value[preproc_tos].condition = ifcond; + stacked_if_value[preproc_tos].else_branch = false; - errno = 0; - val = strtoint(token, &endptr, 10); - if (*endptr != '\0' || errno == ERANGE) + /* initialize literal buffer to a reasonable but expansible size */ + if (literalbuf == NULL) { - /* integer too large, treat it as a float */ - lval->str = pstrdup(token); - return FCONST; + literalalloc = 1024; + literalbuf = (char *) malloc(literalalloc); } - lval->ival = val; - return ICONST; -} + startlit(); -static unsigned int -hexval(unsigned char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 0xA; - if (c >= 'A' && c <= 'F') - return c - 'A' + 0xA; - elog(ERROR, "invalid hexadecimal digit"); - return 0; /* not reached */ + BEGIN(C); } static void -check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner) +addlit(char *ytext, int yleng) { - if (GetDatabaseEncoding() == PG_UTF8) - return; - - if (c > 0x7F) + /* enlarge buffer if needed */ + if ((literallen+yleng) >= literalalloc) { - ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3); /* 3 for U&" */ - yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); + do + literalalloc *= 2; + while ((literallen+yleng) >= literalalloc); + literalbuf = (char *) realloc(literalbuf, literalalloc); } -} - -static bool -is_utf16_surrogate_first(pg_wchar c) -{ - return (c >= 0xD800 && c <= 0xDBFF); -} - -static bool -is_utf16_surrogate_second(pg_wchar c) -{ - return (c >= 0xDC00 && c <= 0xDFFF); -} - -static pg_wchar -surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) -{ - return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF); + /* append new data, add trailing null */ + memcpy(literalbuf+literallen, ytext, yleng); + literallen += yleng; + literalbuf[literallen] = '\0'; } static void -addunicode(pg_wchar c, core_yyscan_t yyscanner) +addlitchar(unsigned char ychar) { - char buf[8]; - - if (c == 0 || c > 0x10FFFF) - yyerror("invalid Unicode escape value"); - if (c > 0x7F) + /* enlarge buffer if needed */ + if ((literallen+1) >= literalalloc) { - if (GetDatabaseEncoding() != PG_UTF8) - yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); - yyextra->saw_non_ascii = true; + literalalloc *= 2; + literalbuf = (char *) realloc(literalbuf, literalalloc); } - unicode_to_utf8(c, (unsigned char *) buf); - addlit(buf, pg_mblen(buf), yyscanner); + /* append new data, add trailing null */ + literalbuf[literallen] = ychar; + literallen += 1; + literalbuf[literallen] = '\0'; } -/* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */ -static bool -check_uescapechar(unsigned char escape) +static void +parse_include(void) { - if (isxdigit(escape) - || escape == '+' - || escape == '\'' - || escape == '"' - || scanner_isspace(escape)) - { - return false; - } - else - return true; -} + /* got the include file name */ + struct _yy_buffer *yb; + struct _include_path *ip; + char inc_file[MAXPGPATH]; + unsigned int i; + + yb = mm_alloc(sizeof(struct _yy_buffer)); + + yb->buffer = YY_CURRENT_BUFFER; + yb->lineno = yylineno; + yb->filename = input_filename; + yb->next = yy_buffer; -/* like litbufdup, but handle unicode escapes */ -static char * -litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) -{ - char *new; - char *litbuf, - *in, - *out; - pg_wchar pair_first = 0; - - /* Make literalbuf null-terminated to simplify the scanning loop */ - litbuf = yyextra->literalbuf; - litbuf[yyextra->literallen] = '\0'; + yy_buffer = yb; /* - * This relies on the subtle assumption that a UTF-8 expansion cannot be - * longer than its escaped representation. + * skip the ";" if there is one and trailing whitespace. Note that + * yytext contains at least one non-space character plus the ";" */ - new = palloc(yyextra->literallen + 1); - - in = litbuf; - out = new; - while (*in) + for (i = strlen(yytext)-2; + i > 0 && ecpg_isspace(yytext[i]); + i--) + ; + + if (yytext[i] == ';') + i--; + + yytext[i+1] = '\0'; + + yyin = NULL; + + /* If file name is enclosed in '"' remove these and look only in '.' */ + /* Informix does look into all include paths though, except filename starts with '/' */ + if (yytext[0] == '"' && yytext[i] == '"' && + ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/')) { - if (in[0] == escape) + yytext[i] = '\0'; + memmove(yytext, yytext+1, strlen(yytext)); + + strlcpy(inc_file, yytext, sizeof(inc_file)); + yyin = fopen(inc_file, "r"); + if (!yyin) { - if (in[1] == escape) + if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0) { - if (pair_first) - { - ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ - yyerror("invalid Unicode surrogate pair"); - } - *out++ = escape; - in += 2; + strcat(inc_file, ".h"); + yyin = fopen(inc_file, "r"); } - else if (isxdigit((unsigned char) in[1]) && - isxdigit((unsigned char) in[2]) && - isxdigit((unsigned char) in[3]) && - isxdigit((unsigned char) in[4])) - { - pg_wchar unicode; + } - unicode = (hexval(in[1]) << 12) + - (hexval(in[2]) << 8) + - (hexval(in[3]) << 4) + - hexval(in[4]); - check_unicode_value(unicode, in, yyscanner); - if (pair_first) - { - if (is_utf16_surrogate_second(unicode)) - { - unicode = surrogate_pair_to_codepoint(pair_first, unicode); - pair_first = 0; - } - else - { - ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ - yyerror("invalid Unicode surrogate pair"); - } - } - else if (is_utf16_surrogate_second(unicode)) - yyerror("invalid Unicode surrogate pair"); + } + else + { + if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>')) + { + yytext[i] = '\0'; + memmove(yytext, yytext+1, strlen(yytext)); + } - if (is_utf16_surrogate_first(unicode)) - pair_first = unicode; - else - { - unicode_to_utf8(unicode, (unsigned char *) out); - out += pg_mblen(out); - } - in += 5; + for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next) + { + if (strlen(ip->path) + strlen(yytext) + 3 > MAXPGPATH) + { + fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno); + continue; } - else if (in[1] == '+' && - isxdigit((unsigned char) in[2]) && - isxdigit((unsigned char) in[3]) && - isxdigit((unsigned char) in[4]) && - isxdigit((unsigned char) in[5]) && - isxdigit((unsigned char) in[6]) && - isxdigit((unsigned char) in[7])) + snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext); + yyin = fopen(inc_file, "r"); + if (!yyin) { - pg_wchar unicode; - - unicode = (hexval(in[2]) << 20) + - (hexval(in[3]) << 16) + - (hexval(in[4]) << 12) + - (hexval(in[5]) << 8) + - (hexval(in[6]) << 4) + - hexval(in[7]); - check_unicode_value(unicode, in, yyscanner); - if (pair_first) + if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0) { - if (is_utf16_surrogate_second(unicode)) - { - unicode = surrogate_pair_to_codepoint(pair_first, unicode); - pair_first = 0; - } - else - { - ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ - yyerror("invalid Unicode surrogate pair"); - } + strcat(inc_file, ".h"); + yyin = fopen( inc_file, "r" ); } - else if (is_utf16_surrogate_second(unicode)) - yyerror("invalid Unicode surrogate pair"); - - if (is_utf16_surrogate_first(unicode)) - pair_first = unicode; - else - { - unicode_to_utf8(unicode, (unsigned char *) out); - out += pg_mblen(out); - } - in += 8; - } - else - { - ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ - yyerror("invalid Unicode escape value"); } - } - else - { - if (pair_first) + /* if the command was "include_next" we have to disregard the first hit */ + if (yyin && include_next) { - ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ - yyerror("invalid Unicode surrogate pair"); + fclose (yyin); + yyin = NULL; + include_next = false; } - *out++ = *in++; } } + if (!yyin) + mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno); - /* unfinished surrogate pair? */ - if (pair_first) - { - ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ - yyerror("invalid Unicode surrogate pair"); - } - - *out = '\0'; + input_filename = mm_strdup(inc_file); + yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE )); + yylineno = 1; + output_line_number(); - /* - * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII - * codes; but it's probably not worth the trouble, since this isn't likely - * to be a performance-critical path. - */ - pg_verifymbstr(new, out - new, false); - return new; + BEGIN(C); } -static unsigned char -unescape_single_char(unsigned char c, core_yyscan_t yyscanner) +/* + * ecpg_isspace() --- return true if flex scanner considers char whitespace + */ +static bool +ecpg_isspace(char ch) { - switch (c) - { - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - default: - /* check for backslash followed by non-7-bit-ASCII */ - if (c == '\0' || IS_HIGHBIT_SET(c)) - yyextra->saw_non_ascii = true; - - return c; - } + if (ch == ' ' || + ch == '\t' || + ch == '\n' || + ch == '\r' || + ch == '\f') + return true; + return false; } -static void -check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner) +static bool isdefine(void) { - if (ychar == '\'') - { - if (yyextra->warn_on_first_escape && yyextra->escape_string_warning) - ereport(WARNING, - (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), - errmsg("nonstandard use of \\' in a string literal"), - errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."), - lexer_errposition())); - yyextra->warn_on_first_escape = false; /* warn only once per string */ - } - else if (ychar == '\\') + struct _defines *ptr; + + /* is it a define? */ + for (ptr = defines; ptr; ptr = ptr->next) { - if (yyextra->warn_on_first_escape && yyextra->escape_string_warning) - ereport(WARNING, - (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), - errmsg("nonstandard use of \\\\ in a string literal"), - errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."), - lexer_errposition())); - yyextra->warn_on_first_escape = false; /* warn only once per string */ + if (strcmp(yytext, ptr->old) == 0 && ptr->used == NULL) + { + struct _yy_buffer *yb; + + yb = mm_alloc(sizeof(struct _yy_buffer)); + + yb->buffer = YY_CURRENT_BUFFER; + yb->lineno = yylineno; + yb->filename = mm_strdup(input_filename); + yb->next = yy_buffer; + + ptr->used = yy_buffer = yb; + + yy_scan_string(ptr->new); + return true; + } } - else - check_escape_warning(yyscanner); + + return false; } -static void -check_escape_warning(core_yyscan_t yyscanner) +static bool isinformixdefine(void) { - if (yyextra->warn_on_first_escape && yyextra->escape_string_warning) - ereport(WARNING, - (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), - errmsg("nonstandard use of escape in a string literal"), - errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."), - lexer_errposition())); - yyextra->warn_on_first_escape = false; /* warn only once per string */ -} + const char *new = NULL; -/* - * Interface functions to make flex use palloc() instead of malloc(). - * It'd be better to make these static, but flex insists otherwise. - */ + if (strcmp(yytext, "dec_t") == 0) + new = "decimal"; + else if (strcmp(yytext, "intrvl_t") == 0) + new = "interval"; + else if (strcmp(yytext, "dtime_t") == 0) + new = "timestamp"; -void * -core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner) -{ - return palloc(bytes); -} + if (new) + { + struct _yy_buffer *yb; -void * -core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner) -{ - if (ptr) - return repalloc(ptr, bytes); - else - return palloc(bytes); -} + yb = mm_alloc(sizeof(struct _yy_buffer)); -void -core_yyfree(void *ptr, core_yyscan_t yyscanner) -{ - if (ptr) - pfree(ptr); + yb->buffer = YY_CURRENT_BUFFER; + yb->lineno = yylineno; + yb->filename = mm_strdup(input_filename); + yb->next = yy_buffer; + yy_buffer = yb; + + yy_scan_string(new); + return true; + } + + return false; }