Index: doc/src/sgml/ref/copy.sgml =================================================================== RCS file: /cvsroot/pgsql-server/doc/src/sgml/ref/copy.sgml,v retrieving revision 1.55 diff -c -c -r1.55 copy.sgml *** doc/src/sgml/ref/copy.sgml 13 Dec 2003 23:59:07 -0000 1.55 --- doc/src/sgml/ref/copy.sgml 16 Apr 2004 04:01:47 -0000 *************** *** 26,32 **** [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] 'delimiter' ] ! [ NULL [ AS ] 'null string' ] ] COPY tablename [ ( column [, ...] ) ] TO { 'filename' | STDOUT } --- 26,35 ---- [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] 'delimiter' ] ! [ NULL [ AS ] 'null string' ] ! [ CSV [ QUOTE [ AS ] 'quote' ] ! [ ESCAPE [ AS ] 'escape' ] ! [ LITERAL column [, ...] ] COPY tablename [ ( column [, ...] ) ] TO { 'filename' | STDOUT } *************** *** 34,40 **** [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] 'delimiter' ] ! [ NULL [ AS ] 'null string' ] ] --- 37,46 ---- [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] 'delimiter' ] ! [ NULL [ AS ] 'null string' ] ! [ CSV [ QUOTE [ AS ] 'quote' ] ! [ ESCAPE [ AS ] 'escape' ] ! [ FORCE column [, ...] ] *************** *** 146,152 **** The single character that separates columns within each row ! (line) of the file. The default is a tab character. --- 152,159 ---- The single character that separates columns within each row ! (line) of the file. The default is a tab character in normal mode, ! a comma in CSV mode. *************** *** 156,175 **** The string that represents a null value. The default is ! \N (backslash-N). You might prefer an empty ! string, for example. ! On a COPY FROM, any data item that matches this string will be stored as a null value, so you should make sure that you use the same string as you used with COPY TO. --- 163,247 ---- The string that represents a null value. The default is ! \N (backslash-N) in normal mode, and a missing ! value (no quotes) in CSV mode. You might prefer an empty ! string in cases where you don't want to distinguish nulls from ! empty strings. ! When using COPY FROM, any data item that matches this string will be stored as a null value, so you should make sure that you use the same string as you used with COPY TO. + + + If you do not want anything used as null when using + COPY FROM, you can specify some value that is very + unlikely to appear in the file, such as frobnitz or + d5f4074b254c76cd8ae37bf1731f4aed (which is + md5('frobnitz')). This could be especially useful + when importing a CSV file into a table with NOT NULL + columns. + + + + + + + CSV + + + Enables Comma Separated Variable (CSV) mode. It sets the + default DELIMITER to comma, and QUOTE and + ESCAPE values to double-quote. + + + + quote string + + + Specifies the quotation character in CSV mode. + The default is double-quote. + + + + + + escape string + + + Specifies the character that should appear before a QUOTE + data character value in CSV mode. The default is double-quote. + + + + + + FORCE + + + In CSV COPY TO mode, forces quotes around each + column specified. + + + + + + LITERAL + + + In CSV COPY FROM mode, for each column specified, + do not do any null string comparison; load the value literally. + + + + *************** *** 234,239 **** --- 306,319 ---- + COPY input and output is affected by DateStyle + . For portability, DateStyle should be set + to ISO before using COPY. In CSV mode, + use ISO or a DateStyle appropriate for the + external application. + + + COPY stops operation at the first error. This should not lead to problems in the event of a COPY TO, but the target table will already have received *************** *** 253,259 **** When COPY is used without the BINARY option, ! the data read or written is a text file with one line per table row. Columns in a row are separated by the delimiter character. The column values themselves are strings generated by the output function, or acceptable to the input function, of each --- 333,340 ---- When COPY is used without the BINARY option, ! the data read or written is a text file with one line per table row, ! unless CSV mode is used. Columns in a row are separated by the delimiter character. The column values themselves are strings generated by the output function, or acceptable to the input function, of each *************** *** 377,382 **** --- 458,517 ---- meant as data, COPY FROM will complain if the line endings in the input are not all alike. + + + + CSV Format + + + This format is used for importing from and exporting to the Comma + Separated Variable (CSV) file format used by many other + programs, such as spreadsheets. Instead of the escaping used by + PostgreSQL's standard text mode, it + produces and recognises the common CSV escaping mechanism. + + + + The values in each record are separated by the DELIMITER + character. If the value contains the delimiter character, the + QUOTE character, a carriage return, or line feed character, + then the whole value is prefixed and suffixed by the QUOTE + character, and any occurrence within the value of a quote character or + the ESCAPE character is preceded by the escape character. + + + + The CSV format uses an unusual method for recording + NULL values. If a value is NULL, no + characters are output for the field. If the value is a zero-length + string, two QUOTE characters are output. On input, the + same logic is used. If a column is NOT NULL and a + missing values is encountered when reading a file, a warning is + issued and the value is treated as a zero-length string rather than + a NULL>/>. This is done to allow easier loading of foreign + CSV files. + + + + + CSV mode will both recognise and produce CSV files with quoted + values containing embedded carriage returns and line feeds. Thus + the files are not strictly one line per table row like non-CSV + files. + + + + + + Many programs produce strange and occasionally perverse CSV files, + so the file format is more a convention than a standard. Thus you + might encounter some files that cannot be imported using this + mechanism, and COPY might produce files that other + programs can not + process. + + + Index: doc/src/sgml/ref/psql-ref.sgml =================================================================== RCS file: /cvsroot/pgsql-server/doc/src/sgml/ref/psql-ref.sgml,v retrieving revision 1.110 diff -c -c -r1.110 psql-ref.sgml *** doc/src/sgml/ref/psql-ref.sgml 12 Apr 2004 15:58:52 -0000 1.110 --- doc/src/sgml/ref/psql-ref.sgml 16 Apr 2004 04:01:50 -0000 *************** *** 711,716 **** --- 711,720 ---- [ oids ] [ delimiter [as] 'character' ] [ null [as] 'string' ] + [ csv [ quote [as] 'string' ] + [ escape [as] 'string' ] + [ force column_list ] + [ literal column_list ] ] Index: src/backend/commands/copy.c =================================================================== RCS file: /cvsroot/pgsql-server/src/backend/commands/copy.c,v retrieving revision 1.220 diff -c -c -r1.220 copy.c *** src/backend/commands/copy.c 15 Apr 2004 22:36:03 -0000 1.220 --- src/backend/commands/copy.c 16 Apr 2004 04:01:54 -0000 *************** *** 70,76 **** typedef enum CopyReadResult { NORMAL_ATTR, ! END_OF_LINE } CopyReadResult; /* --- 70,77 ---- typedef enum CopyReadResult { NORMAL_ATTR, ! END_OF_LINE, ! UNTERMINATED_FIELD } CopyReadResult; /* *************** *** 130,144 **** /* non-export function prototypes */ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print); static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print); static bool CopyReadLine(void); static char *CopyReadAttribute(const char *delim, const char *null_print, CopyReadResult *result, bool *isnull); static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo, Oid typelem, bool *isnull); static void CopyAttributeOut(char *string, char *delim); static List *CopyGetAttnums(Relation rel, List *attnamelist); static void limit_printout_length(StringInfo buf); --- 131,152 ---- /* non-export function prototypes */ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print, bool csv_mode, char *quote, char *escape, ! List *force_atts); static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print, bool csv_mode, char *quote, char *escape, ! List *literal_atts); static bool CopyReadLine(void); static char *CopyReadAttribute(const char *delim, const char *null_print, CopyReadResult *result, bool *isnull); + static char *CopyReadAttributeCSV(const char *delim, const char *null_print, + char *quote, char *escape, + CopyReadResult *result, bool *isnull); static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo, Oid typelem, bool *isnull); static void CopyAttributeOut(char *string, char *delim); + static void CopyAttributeOutCSV(char *string, char *delim, char *quote, + char *escape, bool force_quote); static List *CopyGetAttnums(Relation rel, List *attnamelist); static void limit_printout_length(StringInfo buf); *************** *** 682,689 **** --- 690,704 ---- List *attnumlist; bool binary = false; bool oids = false; + bool csv_mode = false; char *delim = NULL; + char *quote = NULL; + char *escape = NULL; char *null_print = NULL; + List *force = NIL; + List *literal = NIL; + List *force_atts = NIL; + List *literal_atts = NIL; Relation rel; AclMode required_access = (is_from ? ACL_INSERT : ACL_SELECT); AclResult aclresult; *************** *** 725,730 **** --- 740,785 ---- errmsg("conflicting or redundant options"))); null_print = strVal(defel->arg); } + else if (strcmp(defel->defname, "csv") == 0) + { + if (csv_mode) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + csv_mode = intVal(defel->arg); + } + else if (strcmp(defel->defname, "quote") == 0) + { + if (quote) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + quote = strVal(defel->arg); + } + else if (strcmp(defel->defname, "escape") == 0) + { + if (escape) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + escape = strVal(defel->arg); + } + else if (strcmp(defel->defname, "force") == 0) + { + if (force) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + force = (List *)defel->arg; + } + else if (strcmp(defel->defname, "literal") == 0) + { + if (literal) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + literal = (List *)defel->arg; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); *************** *** 735,740 **** --- 790,800 ---- (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cannot specify DELIMITER in BINARY mode"))); + if (binary && csv_mode) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify CSV in BINARY mode"))); + if (binary && null_print) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), *************** *** 742,751 **** /* Set defaults */ if (!delim) ! delim = "\t"; ! if (!null_print) ! null_print = "\\N"; /* * Open and lock the relation, using the appropriate lock type. --- 802,893 ---- /* Set defaults */ if (!delim) ! delim = csv_mode ? "," : "\t"; ! if (!null_print) ! null_print = csv_mode ? "" : "\\N"; ! ! if (csv_mode) ! { ! if (!quote) ! quote = "\""; ! if (!escape) ! escape = quote; ! } ! ! /* ! * Only single-character delimiter strings are supported. ! */ ! if (strlen(delim) != 1) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY delimiter must be a single character"))); ! ! /* ! * Check quote ! */ ! if (!csv_mode && quote != NULL) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY quote available only in CSV mode"))); ! ! if (csv_mode && strlen(quote) != 1) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY quote must be a single character"))); ! ! /* ! * Check escape ! */ ! if (!csv_mode && escape != NULL) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY escape available only in CSV mode"))); ! ! if (csv_mode && strlen(escape) != 1) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY escape must be a single character"))); ! ! /* ! * Check force ! */ ! if (!csv_mode && force != NIL) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY force available only in CSV mode"))); ! if (force != NIL && is_from) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY force only available using COPY TO"))); ! ! /* ! * Check literal ! */ ! if (!csv_mode && literal != NIL) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY literal available only in CSV mode"))); ! if (literal != NIL && !is_from) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY literal only available using COPY FROM"))); ! ! /* ! * Don't allow the delimiter to appear in the null string. ! */ ! if (strchr(null_print, delim[0]) != NULL) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("COPY delimiter must not appear in the NULL specification"))); ! ! /* ! * Don't allow the csv quote char to appear in the null string. ! */ ! if (csv_mode && strchr(null_print, quote[0]) != NULL) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("CSV quote character must not appear in the NULL specification"))); /* * Open and lock the relation, using the appropriate lock type. *************** *** 772,793 **** "psql's \\copy command also works for anyone."))); /* - * Presently, only single-character delimiter strings are supported. - */ - if (strlen(delim) != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY delimiter must be a single character"))); - - /* - * Don't allow the delimiter to appear in the null string. - */ - if (strchr(null_print, delim[0]) != NULL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY delimiter must not appear in the NULL specification"))); - - /* * Don't allow COPY w/ OIDs to or from a table without them */ if (oids && !rel->rd_rel->relhasoids) --- 914,919 ---- *************** *** 802,807 **** --- 928,979 ---- attnumlist = CopyGetAttnums(rel, attnamelist); /* + * Check that FORCE references valid COPY columns + */ + if (force) + { + TupleDesc tupDesc = RelationGetDescr(rel); + Form_pg_attribute *attr = tupDesc->attrs; + List *cur; + + force_atts = CopyGetAttnums(rel, force); + + foreach(cur, force_atts) + { + int attnum = lfirsti(cur); + + if (!intMember(attnum, attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE column \"%s\" not referenced by COPY", + NameStr(attr[attnum - 1]->attname)))); + } + } + + /* + * Check that LITERAL references valid COPY columns + */ + if (literal) + { + List *cur; + TupleDesc tupDesc = RelationGetDescr(rel); + Form_pg_attribute *attr = tupDesc->attrs; + + literal_atts = CopyGetAttnums(rel, literal); + + foreach(cur, literal_atts) + { + int attnum = lfirsti(cur); + + if (!intMember(attnum, attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("LITERAL column \"%s\" not referenced by COPY", + NameStr(attr[attnum - 1]->attname)))); + } + } + + /* * Set up variables to avoid per-attribute overhead. */ initStringInfo(&attribute_buf); *************** *** 864,870 **** errmsg("\"%s\" is a directory", filename))); } } ! CopyFrom(rel, attnumlist, binary, oids, delim, null_print); } else { /* copy from database to file */ --- 1036,1043 ---- errmsg("\"%s\" is a directory", filename))); } } ! CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode, ! quote, escape, literal_atts); } else { /* copy from database to file */ *************** *** 926,932 **** errmsg("\"%s\" is a directory", filename))); } } ! CopyTo(rel, attnumlist, binary, oids, delim, null_print); } if (!pipe) --- 1099,1106 ---- errmsg("\"%s\" is a directory", filename))); } } ! CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode, ! quote, escape, force_atts); } if (!pipe) *************** *** 958,964 **** */ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print) { HeapTuple tuple; TupleDesc tupDesc; --- 1132,1139 ---- */ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print, bool csv_mode, char *quote, ! char *escape, List *force_atts) { HeapTuple tuple; TupleDesc tupDesc; *************** *** 967,972 **** --- 1142,1148 ---- int attr_count; Form_pg_attribute *attr; FmgrInfo *out_functions; + bool *force_quote; Oid *elements; bool *isvarlena; char *string; *************** *** 988,998 **** out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo)); elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid)); isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool)); foreach(cur, attnumlist) { int attnum = lfirsti(cur); Oid out_func_oid; ! if (binary) getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid, &out_func_oid, &elements[attnum - 1], --- 1164,1175 ---- out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo)); elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid)); isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool)); + force_quote = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool)); foreach(cur, attnumlist) { int attnum = lfirsti(cur); Oid out_func_oid; ! if (binary) getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid, &out_func_oid, &elements[attnum - 1], *************** *** 1002,1007 **** --- 1179,1188 ---- &out_func_oid, &elements[attnum - 1], &isvarlena[attnum - 1]); fmgr_info(out_func_oid, &out_functions[attnum - 1]); + if (intMember(attnum, force_atts)) + force_quote[attnum - 1] = true; + else + force_quote[attnum - 1] = false; } /* *************** *** 1051,1057 **** while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL) { bool need_delim = false; - CHECK_FOR_INTERRUPTS(); MemoryContextReset(mycontext); --- 1232,1237 ---- *************** *** 1113,1119 **** value, ObjectIdGetDatum(elements[attnum - 1]), Int32GetDatum(attr[attnum - 1]->atttypmod))); ! CopyAttributeOut(string, delim); } else { --- 1293,1307 ---- value, ObjectIdGetDatum(elements[attnum - 1]), Int32GetDatum(attr[attnum - 1]->atttypmod))); ! if (csv_mode) ! { ! CopyAttributeOutCSV(string, delim, quote, escape, ! (strcmp(string, null_print) == 0 || ! force_quote[attnum - 1])); ! } ! else ! CopyAttributeOut(string, delim); ! } else { *************** *** 1148,1153 **** --- 1336,1342 ---- pfree(out_functions); pfree(elements); pfree(isvarlena); + pfree(force_quote); } *************** *** 1243,1249 **** */ static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print) { HeapTuple tuple; TupleDesc tupDesc; --- 1432,1439 ---- */ static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, ! char *delim, char *null_print, bool csv_mode, char *quote, ! char *escape, List *literal_atts) { HeapTuple tuple; TupleDesc tupDesc; *************** *** 1256,1264 **** Oid *elements; Oid oid_in_element; ExprState **constraintexprs; bool hasConstraints = false; - int i; int attnum; List *cur; Oid in_func_oid; Datum *values; --- 1446,1455 ---- Oid *elements; Oid oid_in_element; ExprState **constraintexprs; + bool *literal_nullstr; bool hasConstraints = false; int attnum; + int i; List *cur; Oid in_func_oid; Datum *values; *************** *** 1317,1322 **** --- 1508,1514 ---- defmap = (int *) palloc((num_phys_attrs + 1) * sizeof(int)); defexprs = (ExprState **) palloc((num_phys_attrs + 1) * sizeof(ExprState *)); constraintexprs = (ExprState **) palloc0((num_phys_attrs + 1) * sizeof(ExprState *)); + literal_nullstr = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool)); for (attnum = 1; attnum <= num_phys_attrs; attnum++) { *************** *** 1333,1338 **** --- 1525,1535 ---- &in_func_oid, &elements[attnum - 1]); fmgr_info(in_func_oid, &in_functions[attnum - 1]); + if (intMember(attnum, literal_atts)) + literal_nullstr[attnum - 1] = true; + else + literal_nullstr[attnum - 1] = false; + /* Get default info if needed */ if (!intMember(attnum, attnumlist)) { *************** *** 1389,1397 **** ExecBSInsertTriggers(estate, resultRelInfo); if (!binary) - { file_has_oids = oids; /* must rely on user to tell us this... */ - } else { /* Read and verify binary header */ --- 1586,1592 ---- *************** *** 1500,1505 **** --- 1695,1701 ---- if (file_has_oids) { + /* can't be in CSV mode here */ string = CopyReadAttribute(delim, null_print, &result, &isnull); *************** *** 1538,1551 **** errmsg("missing data for column \"%s\"", NameStr(attr[m]->attname)))); ! string = CopyReadAttribute(delim, null_print, ! &result, &isnull); ! ! if (isnull) { ! /* we read an SQL NULL, no need to do anything */ } else { copy_attname = NameStr(attr[m]->attname); values[m] = FunctionCall3(&in_functions[m], --- 1734,1760 ---- errmsg("missing data for column \"%s\"", NameStr(attr[m]->attname)))); ! if (csv_mode) { ! string = CopyReadAttributeCSV(delim, null_print, quote, ! escape, &result, &isnull); ! if (result == UNTERMINATED_FIELD) ! ereport(ERROR, ! (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), ! errmsg("unterminated CSV quoted field"))); } else + string = CopyReadAttribute(delim, null_print, + &result, &isnull); + + if (csv_mode && isnull && literal_nullstr[m]) + { + string = null_print; /* set to NULL string */ + isnull = false; + } + + /* we read an SQL NULL, no need to do anything */ + if (!isnull) { copy_attname = NameStr(attr[m]->attname); values[m] = FunctionCall3(&in_functions[m], *************** *** 1732,1742 **** pfree(values); pfree(nulls); ! if (!binary) ! { ! pfree(in_functions); ! pfree(elements); ! } ExecDropTupleTable(tupleTable, true); --- 1941,1952 ---- pfree(values); pfree(nulls); ! pfree(in_functions); ! pfree(elements); ! pfree(defmap); ! pfree(defexprs); ! pfree(constraintexprs); ! pfree(literal_nullstr); ExecDropTupleTable(tupleTable, true); *************** *** 2070,2075 **** --- 2280,2431 ---- return attribute_buf.data; } + + /* + * Read the value of a single attribute in CSV mode, + * performing de-escaping as needed. Escaping does not follow the normal + * PostgreSQL text mode, but instead "standard" (i.e. common) CSV usage. + * + * Quoted fields can span lines, in which case the line end is embedded + * in the returned string. + * + * null_print is the null marker string. Note that this is compared to + * the pre-de-escaped input string (thus if it is quoted it is not a NULL). + * + * *result is set to indicate what terminated the read: + * NORMAL_ATTR: column delimiter + * END_OF_LINE: end of line + * UNTERMINATED_FIELD no quote detected at end of a quoted field + * + * In any case, the string read up to the terminator (or end of file) + * is returned. + * + * *isnull is set true or false depending on whether the input matched + * the null marker. Note that the caller cannot check this since the + * returned string will be the post-de-escaping equivalent, which may + * look the same as some valid data string. + *---------- + */ + + static char * + CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote, + char *escape, CopyReadResult *result, bool *isnull) + { + char delimc = delim[0]; + char quotec = quote[0]; + char escapec = escape[0]; + char c; + int start_cursor = line_buf.cursor; + int end_cursor = start_cursor; + int input_len; + bool in_quote = false; + bool saw_quote = false; + + /* reset attribute_buf to empty */ + attribute_buf.len = 0; + attribute_buf.data[0] = '\0'; + + /* set default status */ + *result = END_OF_LINE; + + for (;;) + { + /* handle multiline quoted fields */ + if (in_quote && line_buf.cursor >= line_buf.len) + { + bool done; + + switch(eol_type) + { + case EOL_NL: + appendStringInfoString(&attribute_buf,"\n"); + break; + case EOL_CR: + appendStringInfoString(&attribute_buf,"\r"); + break; + case EOL_CRNL: + appendStringInfoString(&attribute_buf,"\r\n"); + break; + case EOL_UNKNOWN: + /* shouldn't happen - just keep going */ + break; + } + + copy_lineno++; + done = CopyReadLine(); + if (done && line_buf.len == 0) + break; + start_cursor = line_buf.cursor; + } + + end_cursor = line_buf.cursor; + if (line_buf.cursor >= line_buf.len) + break; + c = line_buf.data[line_buf.cursor++]; + /* + * unquoted field delimiter + */ + if (!in_quote && c == delimc) + { + *result = NORMAL_ATTR; + break; + } + /* + * start of quoted field (or part of field) + */ + if (!in_quote && c == quotec) + { + saw_quote = true; + in_quote = true; + continue; + } + /* + * escape within a quoted field + */ + if (in_quote && c == escapec) + { + /* + * peek at the next char if available, and escape it if it + * is an escape char or a quote char + */ + if (line_buf.cursor <= line_buf.len) + { + char nextc = line_buf.data[line_buf.cursor]; + if (nextc == escapec || nextc == quotec) + { + appendStringInfoCharMacro(&attribute_buf, nextc); + line_buf.cursor++; + continue; + } + } + } + /* + * end of quoted field. + * Must do this test after testing for escape in case quote char + * and escape char are the same (which is the common case). + */ + if (in_quote && c == quotec) + { + in_quote = false; + continue; + } + appendStringInfoCharMacro(&attribute_buf, c); + } + + if (in_quote) + *result = UNTERMINATED_FIELD; + + /* check whether raw input matched null marker */ + input_len = end_cursor - start_cursor; + if (!saw_quote && input_len == strlen(null_print) && + strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0) + *isnull = true; + else + *isnull = false; + + return attribute_buf.data; + } + /* * Read a binary attribute */ *************** *** 2193,2198 **** --- 2549,2621 ---- break; } } + } + + /* + * Send CSV representation of one attribute, with conversion and + * CSV type escaping + */ + static void + CopyAttributeOutCSV(char *server_string, char *delim, char *quote, + char *escape, bool force_quote) + { + char *string; + char c; + char delimc = delim[0]; + char quotec = quote[0]; + char escapec = escape[0]; + bool need_quote = force_quote; + char *test_string; + bool same_encoding; + int mblen; + int i; + + same_encoding = (server_encoding == client_encoding); + if (!same_encoding) + string = (char *) pg_server_to_client((unsigned char *) server_string, + strlen(server_string)); + else + string = server_string; + + /* have to run through the string twice, + * first time to see if it needs quoting, second to actually send it + */ + + for(test_string = string; + !need_quote && (c = *test_string) != '\0'; + test_string += mblen) + { + if (c == delimc || c == quotec || c == '\n' || c == '\r') + need_quote = true; + if (!same_encoding) + mblen = pg_encoding_mblen(client_encoding, test_string); + else + mblen = 1; + } + + if (need_quote) + CopySendChar(quotec); + + for (; (c = *string) != '\0'; string += mblen) + { + if (c == quotec || c == escapec) + CopySendChar(escapec); + + CopySendChar(c); + + if (!same_encoding) + { + /* send additional bytes of the char, if any */ + mblen = pg_encoding_mblen(client_encoding, string); + for (i = 1; i < mblen; i++) + CopySendChar(string[i]); + } + else + mblen = 1; + } + + if (need_quote) + CopySendChar(quotec); } /* Index: src/backend/parser/gram.y =================================================================== RCS file: /cvsroot/pgsql-server/src/backend/parser/gram.y,v retrieving revision 2.450 diff -c -c -r2.450 gram.y *** src/backend/parser/gram.y 5 Apr 2004 03:07:26 -0000 2.450 --- src/backend/parser/gram.y 16 Apr 2004 04:02:02 -0000 *************** *** 343,349 **** CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB ! CREATEUSER CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS --- 343,349 ---- CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB ! CREATEUSER CROSS CSV CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS *************** *** 370,376 **** KEY LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT ! LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE --- 370,376 ---- KEY LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT ! LISTEN LITERAL LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE *************** *** 386,391 **** --- 386,393 ---- PRECISION PRESERVE PREPARE PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE + QUOTE + READ REAL RECHECK REFERENCES REINDEX RELATIVE_P RENAME REPEATABLE REPLACE RESET RESTART RESTRICT RETURNS REVOKE RIGHT ROLLBACK ROW ROWS RULE *************** *** 1360,1365 **** --- 1362,1387 ---- { $$ = makeDefElem("null", (Node *)makeString($3)); } + | CSV + { + $$ = makeDefElem("csv", (Node *)makeInteger(TRUE)); + } + | QUOTE opt_as Sconst + { + $$ = makeDefElem("quote", (Node *)makeString($3)); + } + | ESCAPE opt_as Sconst + { + $$ = makeDefElem("escape", (Node *)makeString($3)); + } + | FORCE columnList + { + $$ = makeDefElem("force", (Node *)$2); + } + | LITERAL columnList + { + $$ = makeDefElem("literal", (Node *)$2); + } ; /* The following exist for backward compatibility */ *************** *** 7420,7425 **** --- 7442,7448 ---- | COPY | CREATEDB | CREATEUSER + | CSV | CURSOR | CYCLE | DATABASE *************** *** 7473,7478 **** --- 7496,7502 ---- | LAST_P | LEVEL | LISTEN + | LITERAL | LOAD | LOCAL | LOCATION *************** *** 7507,7512 **** --- 7531,7537 ---- | PRIVILEGES | PROCEDURAL | PROCEDURE + | QUOTE | READ | RECHECK | REINDEX Index: src/backend/parser/keywords.c =================================================================== RCS file: /cvsroot/pgsql-server/src/backend/parser/keywords.c,v retrieving revision 1.147 diff -c -c -r1.147 keywords.c *** src/backend/parser/keywords.c 11 Mar 2004 01:47:40 -0000 1.147 --- src/backend/parser/keywords.c 16 Apr 2004 04:02:03 -0000 *************** *** 90,95 **** --- 90,96 ---- {"createdb", CREATEDB}, {"createuser", CREATEUSER}, {"cross", CROSS}, + {"csv", CSV}, {"current_date", CURRENT_DATE}, {"current_time", CURRENT_TIME}, {"current_timestamp", CURRENT_TIMESTAMP}, *************** *** 186,191 **** --- 187,193 ---- {"like", LIKE}, {"limit", LIMIT}, {"listen", LISTEN}, + {"literal", LITERAL}, {"load", LOAD}, {"local", LOCAL}, {"localtime", LOCALTIME}, *************** *** 248,253 **** --- 250,256 ---- {"privileges", PRIVILEGES}, {"procedural", PROCEDURAL}, {"procedure", PROCEDURE}, + {"quote", QUOTE}, {"read", READ}, {"real", REAL}, {"recheck", RECHECK}, Index: src/backend/tcop/fastpath.c =================================================================== RCS file: /cvsroot/pgsql-server/src/backend/tcop/fastpath.c,v retrieving revision 1.71 diff -c -c -r1.71 fastpath.c *** src/backend/tcop/fastpath.c 7 Jan 2004 18:56:27 -0000 1.71 --- src/backend/tcop/fastpath.c 16 Apr 2004 04:02:04 -0000 *************** *** 154,161 **** bool typisvarlena; char *outputstr; ! getTypeOutputInfo(rettype, ! &typoutput, &typelem, &typisvarlena); outputstr = DatumGetCString(OidFunctionCall3(typoutput, retval, ObjectIdGetDatum(typelem), --- 154,160 ---- bool typisvarlena; char *outputstr; ! getTypeOutputInfo(rettype, &typoutput, &typelem, &typisvarlena); outputstr = DatumGetCString(OidFunctionCall3(typoutput, retval, ObjectIdGetDatum(typelem), Index: src/bin/psql/copy.c =================================================================== RCS file: /cvsroot/pgsql-server/src/bin/psql/copy.c,v retrieving revision 1.43 diff -c -c -r1.43 copy.c *** src/bin/psql/copy.c 12 Apr 2004 15:58:52 -0000 1.43 --- src/bin/psql/copy.c 16 Apr 2004 04:02:06 -0000 *************** *** 66,73 **** --- 66,78 ---- bool from; bool binary; bool oids; + bool csv_mode; char *delim; char *null; + char *quote; + char *escape; + char *force_list; + char *literal_list; }; *************** *** 81,86 **** --- 86,95 ---- free(ptr->file); free(ptr->delim); free(ptr->null); + free(ptr->quote); + free(ptr->escape); + free(ptr->force_list); + free(ptr->literal_list); free(ptr); } *************** *** 277,282 **** --- 286,295 ---- { result->oids = true; } + else if (strcasecmp(token, "csv") == 0) + { + result->csv_mode = true; + } else if (strcasecmp(token, "delimiter") == 0) { token = strtokx(NULL, whitespace, NULL, "'", *************** *** 301,306 **** --- 314,383 ---- else goto error; } + else if (strcasecmp(token, "quote") == 0) + { + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token && strcasecmp(token, "as") == 0) + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token) + result->quote = pg_strdup(token); + else + goto error; + } + else if (strcasecmp(token, "escape") == 0) + { + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token && strcasecmp(token, "as") == 0) + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token) + result->escape = pg_strdup(token); + else + goto error; + } + else if (strcasecmp(token, "force") == 0) + { + /* handle parenthesized column list */ + for (;;) + { + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, pset.encoding); + if (!token || strchr(".,()", token[0])) + goto error; + if (!result->force_list) + result->force_list = pg_strdup(token); + else + xstrcat(&result->force_list, token); + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, pset.encoding); + if (!token || token[0] != ',') + break; + xstrcat(&result->force_list, token); + } + } + else if (strcasecmp(token, "literal") == 0) + { + /* handle parenthesized column list */ + for (;;) + { + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, pset.encoding); + if (!token || strchr(".,()", token[0])) + goto error; + if (!result->literal_list) + result->literal_list = pg_strdup(token); + else + xstrcat(&result->literal_list, token); + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, pset.encoding); + if (!token || token[0] != ',') + break; + xstrcat(&result->literal_list, token); + } + } else goto error; *************** *** 340,346 **** PGresult *result; bool success; struct stat st; ! /* parse options */ options = parse_slash_copy(args); --- 417,424 ---- PGresult *result; bool success; struct stat st; ! bool with_output = false; ! /* parse options */ options = parse_slash_copy(args); *************** *** 379,390 **** --- 457,512 ---- options->delim); } + /* There is no backward-compatible CSV syntax */ if (options->null) { if (options->null[0] == '\'') appendPQExpBuffer(&query, " WITH NULL AS %s", options->null); else appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null); + with_output = true; + } + + if (options->csv_mode) + { + appendPQExpBuffer(&query, " %sCSV ", with_output ? "" : "WITH "); + with_output = true; + } + + if (options->quote) + { + if (options->quote[0] == '\'') + appendPQExpBuffer(&query, " %sQUOTE AS %s", + with_output ? "" : "WITH ", options->quote); + else + appendPQExpBuffer(&query, " %sQUOTE AS '%s'", + with_output ? "" : "WITH ", options->quote); + with_output = true; + } + + if (options->escape) + { + if (options->escape[0] == '\'') + appendPQExpBuffer(&query, " %sESCAPE AS %s", + with_output ? "" : "WITH ", options->escape); + else + appendPQExpBuffer(&query, " %sESCAPE AS '%s'", + with_output ? "" : "WITH ", options->escape); + with_output = true; + } + + if (options->force_list) + { + appendPQExpBuffer(&query, " %sFORCE %s", + with_output ? "" : "WITH ", options->force_list); + with_output = true; + } + + if (options->literal_list) + { + appendPQExpBuffer(&query, " %sLITERAL %s", + with_output ? "" : "WITH ", options->literal_list); + with_output = true; } if (options->from)