From 0137963134ac88b009a8e93d6a39cabfaefe43f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20V=C3=A9rit=C3=A9?= Date: Tue, 19 Dec 2023 11:49:35 +0100 Subject: [PATCH v2] Support backslash-dot on a line by itself as valid data in COPY FROM...CSV --- doc/src/sgml/ref/psql-ref.sgml | 4 ---- src/backend/commands/copyfromparse.c | 13 ++--------- src/bin/psql/copy.c | 32 ++++++++++++++++++++-------- src/test/regress/expected/copy.out | 18 ++++++++++++++++ src/test/regress/sql/copy.sql | 12 +++++++++++ 5 files changed, 55 insertions(+), 24 deletions(-) diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index cc7d797159..d94e3cacfc 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -1119,10 +1119,6 @@ INSERT INTO tbl1 VALUES ($1, $2) \bind 'first value' 'second value' \g destination, because all data must pass through the client/server connection. For large amounts of data the SQL command might be preferable. - Also, because of this pass-through method, \copy - ... from in CSV mode will erroneously - treat a \. data value alone on a line as an - end-of-input marker. diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index f553734582..b4c291b25b 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -1137,7 +1137,6 @@ CopyReadLineText(CopyFromState cstate) bool result = false; /* CSV variables */ - bool first_char_in_line = true; bool in_quote = false, last_was_esc = false; char quotec = '\0'; @@ -1335,10 +1334,9 @@ CopyReadLineText(CopyFromState cstate) } /* - * In CSV mode, we only recognize \. alone on a line. This is because - * \. is a valid CSV data value. + * In CSV mode, \. is a valid CSV data value anywhere in the line. */ - if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line)) + if (c == '\\' && !cstate->opts.csv_mode) { char c2; @@ -1442,14 +1440,7 @@ CopyReadLineText(CopyFromState cstate) } } - /* - * This label is for CSV cases where \. appears at the start of a - * line, but there is more text after it, meaning it was a data value. - * We are more strict for \. in CSV mode because \. could be a data - * value, while in non-CSV mode, \. cannot be a data value. - */ not_end_of_copy: - first_char_in_line = false; } /* end of outer loop */ /* diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c index dbbbdb8898..f31a7acbb6 100644 --- a/src/bin/psql/copy.c +++ b/src/bin/psql/copy.c @@ -620,20 +620,34 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res) /* current line is done? */ if (buf[buflen - 1] == '\n') { - /* check for EOF marker, but not on a partial line */ - if (at_line_begin) + /* + * When at the beginning of the line, check for EOF marker. + * If the marker is found and the data is inlined, + * we must stop at this point. If not, the \. line can be + * sent to the server, and we let it decide whether + * it's an EOF or not depending on the format: in + * basic TEXT, \. is going to be interpreted as an EOF, in + * CSV, it will not. + */ + if (at_line_begin && copystream == pset.cur_cmd_source) { - /* - * This code erroneously assumes '\.' on a line alone - * inside a quoted CSV string terminates the \copy. - * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org - * - * https://www.postgresql.org/message-id/bfcd57e4-8f23-4c3e-a5db-2571d09208e2@beta.fastmail.com - */ if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) || (linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0)) { copydone = true; + /* + * Remove the EOF marker from the data sent, unless + * using the obsolete v2 protocol that needs it. + * In the case of CSV, the EOF marker must be + * removed, otherwise it would be interpreted by + * the server as valid data. + */ + if (copystream == pset.cur_cmd_source && + PQprotocolVersion(conn) >= 3) + { + *fgresult='\0'; + buflen -= linelen; + } } } diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index b48365ec98..645e849bbd 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -32,6 +32,24 @@ select * from copytest except select * from copytest2; -------+------+-------- (0 rows) +--- test unquoted \. as data inside CSV +-- do not use copy out to export the data, as it would quote \. +\o :filename +\qecho line1 +\qecho '\\.' +\qecho line2 +\o +-- get the data back in with copy +truncate copytest2; +copy copytest2(test) from :'filename' csv; +select test from copytest2 order by test collate "C"; + test +------- + \. + line1 + line2 +(3 rows) + -- test header line feature create temp table copytest3 ( c1 int, diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 43d2e906dd..68a58740cb 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -38,6 +38,18 @@ copy copytest2 from :'filename' csv quote '''' escape E'\\'; select * from copytest except select * from copytest2; +--- test unquoted \. as data inside CSV +-- do not use copy out to export the data, as it would quote \. +\o :filename +\qecho line1 +\qecho '\\.' +\qecho line2 +\o +-- get the data back in with copy +truncate copytest2; +copy copytest2(test) from :'filename' csv; +select test from copytest2 order by test collate "C"; + -- test header line feature -- 2.34.1