From 2debe6cf7eec396e269a2c3d89ee56f5aea711e2 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 6 Feb 2018 21:46:46 -0500 Subject: [PATCH v2 1/2] Add user-callable SHA-2 functions Add the user-callable functions sha224, sha256, sha384, sha512. We already had these in the C code to support SCRAM, but there was no test coverage outside of the SCRAM tests. Adding these as user-callable functions allows writing some tests. Also, we have a user-callable md5 function but no more modern alternative, which led to wide use of md5 as a general-purpose hash function, which leads to occasional complaints about using md5. Also mark the existing md5 functions as leak-proof. --- doc/src/sgml/func.sgml | 71 ++++++++++++- src/backend/utils/adt/Makefile | 3 +- src/backend/utils/adt/cryptohashes.c | 170 +++++++++++++++++++++++++++++++ src/backend/utils/adt/varlena.c | 48 --------- src/include/catalog/pg_proc.h | 12 ++- src/test/regress/expected/opr_sanity.out | 6 ++ src/test/regress/expected/strings.out | 53 ++++++++++ src/test/regress/sql/strings.sql | 18 ++++ 8 files changed, 329 insertions(+), 52 deletions(-) create mode 100644 src/backend/utils/adt/cryptohashes.c diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 1e535cf215..2f59af25a6 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3640,7 +3640,7 @@ Other Binary String Functions returning the result in hexadecimal md5(E'Th\\000omas'::bytea) - 8ab2d3c9689aaf18 b4958c334c82d8b1 + 8ab2d3c9689aaf18​b4958c334c82d8b1 @@ -3674,6 +3674,66 @@ Other Binary String Functions set_byte(E'Th\\000omas'::bytea, 4, 64) Th\000o@as + + + + + sha224 + + sha224(bytea) + + bytea + + SHA-224 hash + + sha224('abc') + \x23097d223405d8228642a477bda2​55b32aadbce4bda0b3f7e36c9da7 + + + + + + sha256 + + sha256(bytea) + + bytea + + SHA-256 hash + + sha256('abc') + \xba7816bf8f01cfea414140de5dae2223​b00361a396177a9cb410ff61f20015ad + + + + + + sha384 + + sha384(bytea) + + bytea + + SHA-384 hash + + sha384('abc') + \xcb00753f45a35e8bb5a03d699ac65007​272c32ab0eded1631a8b605a43ff5bed​8086072ba1e7cc2358baeca134c825a7 + + + + + + sha512 + + sha512(bytea) + + bytea + + SHA-512 hash + + sha512('abc') + \xddaf35a193617abacc417349ae204131​12e6fa4e89a97ea20a9eeee64b55d39a​2192992a274fc1a836ba3c23a3feebbd​454d4423643ce80e2a9ac94fa54ca49f + @@ -3686,6 +3746,15 @@ Other Binary String Functions the first byte, and bit 15 is the most significant bit of the second byte. + + Note that for historic reasons, the function md5 + returns a hex-encoded value of type text whereas the SHA-2 + functions return type bytea. Use the functions + encode and decode to convert + between the two, for example encode(sha256('abc'), + 'hex') to get a hex-encoded text representation. + + See also the aggregate function string_agg in and the large object functions diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 61ca90312f..4b35dbb8bb 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -11,7 +11,8 @@ include $(top_builddir)/src/Makefile.global # keep this list arranged alphabetically or it gets to be a mess OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ array_typanalyze.o array_userfuncs.o arrayutils.o ascii.o \ - bool.o cash.o char.o date.o datetime.o datum.o dbsize.o domains.o \ + bool.o cash.o char.o cryptohashes.o \ + date.o datetime.o datum.o dbsize.o domains.o \ encode.o enum.o expandeddatum.o expandedrecord.o \ float.o format_type.o formatting.o genfile.o \ geo_ops.o geo_selfuncs.o geo_spgist.o inet_cidr_ntop.o inet_net_pton.o \ diff --git a/src/backend/utils/adt/cryptohashes.c b/src/backend/utils/adt/cryptohashes.c new file mode 100644 index 0000000000..1f242eba71 --- /dev/null +++ b/src/backend/utils/adt/cryptohashes.c @@ -0,0 +1,170 @@ +/*------------------------------------------------------------------------- + * + * cryptohashes.c + * Cryptographic hash functions + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/cryptohashes.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/md5.h" +#include "common/sha2.h" +#include "utils/builtins.h" + + +/* + * MD5 + */ + +/* MD5 produces a 16 byte (128 bit) hash; double it for hex */ +#define MD5_HASH_LEN 32 + +/* + * Create an MD5 hash of a text value and return it as hex string. + */ +Datum +md5_text(PG_FUNCTION_ARGS) +{ + text *in_text = PG_GETARG_TEXT_PP(0); + size_t len; + char hexsum[MD5_HASH_LEN + 1]; + + /* Calculate the length of the buffer using varlena metadata */ + len = VARSIZE_ANY_EXHDR(in_text); + + /* get the hash result */ + if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* convert to text and return it */ + PG_RETURN_TEXT_P(cstring_to_text(hexsum)); +} + +/* + * Create an MD5 hash of a bytea value and return it as a hex string. + */ +Datum +md5_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + size_t len; + char hexsum[MD5_HASH_LEN + 1]; + + len = VARSIZE_ANY_EXHDR(in); + if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + PG_RETURN_TEXT_P(cstring_to_text(hexsum)); +} + + +/* + * SHA-2 variants + */ + +Datum +sha224_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha224_ctx ctx; + unsigned char buf[PG_SHA224_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha224_init(&ctx); + pg_sha224_update(&ctx, data, len); + pg_sha224_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha256_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha256_ctx ctx; + unsigned char buf[PG_SHA256_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha256_init(&ctx); + pg_sha256_update(&ctx, data, len); + pg_sha256_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha384_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha384_ctx ctx; + unsigned char buf[PG_SHA384_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha384_init(&ctx); + pg_sha384_update(&ctx, data, len); + pg_sha384_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha512_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha512_ctx ctx; + unsigned char buf[PG_SHA512_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha512_init(&ctx); + pg_sha512_update(&ctx, data, len); + pg_sha512_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 304cb26691..4346410d5a 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -22,7 +22,6 @@ #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "common/int.h" -#include "common/md5.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "miscadmin.h" @@ -4564,53 +4563,6 @@ to_hex64(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(ptr)); } -/* - * Create an md5 hash of a text string and return it as hex - * - * md5 produces a 16 byte (128 bit) hash; double it for hex - */ -#define MD5_HASH_LEN 32 - -Datum -md5_text(PG_FUNCTION_ARGS) -{ - text *in_text = PG_GETARG_TEXT_PP(0); - size_t len; - char hexsum[MD5_HASH_LEN + 1]; - - /* Calculate the length of the buffer using varlena metadata */ - len = VARSIZE_ANY_EXHDR(in_text); - - /* get the hash result */ - if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* convert to text and return it */ - PG_RETURN_TEXT_P(cstring_to_text(hexsum)); -} - -/* - * Create an md5 hash of a bytea field and return it as a hex string: - * 16-byte md5 digest is represented in 32 hex characters. - */ -Datum -md5_bytea(PG_FUNCTION_ARGS) -{ - bytea *in = PG_GETARG_BYTEA_PP(0); - size_t len; - char hexsum[MD5_HASH_LEN + 1]; - - len = VARSIZE_ANY_EXHDR(in); - if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - PG_RETURN_TEXT_P(cstring_to_text(hexsum)); -} - /* * Return the size of a datum, possibly compressed * diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 2a5321315a..62e16514cc 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3917,10 +3917,18 @@ DATA(insert OID = 3314 ( system PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 33 DESCR("SYSTEM tablesample method handler"); /* cryptographic */ -DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ )); +DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ )); DESCR("MD5 hash"); -DATA(insert OID = 2321 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ md5_bytea _null_ _null_ _null_ )); +DATA(insert OID = 2321 ( md5 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ md5_bytea _null_ _null_ _null_ )); DESCR("MD5 hash"); +DATA(insert OID = 3419 ( sha224 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha224_bytea _null_ _null_ _null_ )); +DESCR("SHA-224 hash"); +DATA(insert OID = 3420 ( sha256 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha256_bytea _null_ _null_ _null_ )); +DESCR("SHA-256 hash"); +DATA(insert OID = 3421 ( sha384 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha384_bytea _null_ _null_ _null_ )); +DESCR("SHA-384 hash"); +DATA(insert OID = 3422 ( sha512 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha512_bytea _null_ _null_ _null_ )); +DESCR("SHA-512 hash"); /* crosstype operations for date vs. timestamp and timestamptz */ DATA(insert OID = 2338 ( date_lt_timestamp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "1082 1114" _null_ _null_ _null_ _null_ _null_ date_lt_timestamp _null_ _null_ _null_ )); diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 684f7f20a8..6616cc1bf0 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -699,6 +699,8 @@ timestamp_lt(timestamp without time zone,timestamp without time zone) timestamp_le(timestamp without time zone,timestamp without time zone) timestamp_ge(timestamp without time zone,timestamp without time zone) timestamp_gt(timestamp without time zone,timestamp without time zone) +md5(text) +md5(bytea) tidgt(tid,tid) tidlt(tid,tid) tidge(tid,tid) @@ -711,6 +713,10 @@ uuid_gt(uuid,uuid) uuid_ne(uuid,uuid) xidneq(xid,xid) xidneqint4(xid,integer) +sha224(bytea) +sha256(bytea) +sha384(bytea) +sha512(bytea) macaddr8_eq(macaddr8,macaddr8) macaddr8_lt(macaddr8,macaddr8) macaddr8_le(macaddr8,macaddr8) diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 8073eb4fad..cbe66c375c 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -1439,6 +1439,58 @@ select md5('12345678901234567890123456789012345678901234567890123456789012345678 t (1 row) +-- +-- SHA-2 +-- +SET bytea_output TO hex; +SELECT sha224(''); + sha224 +------------------------------------------------------------ + \xd14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f +(1 row) + +SELECT sha224('The quick brown fox jumps over the lazy dog.'); + sha224 +------------------------------------------------------------ + \x619cba8e8e05826e9b8c519c0a5c68f4fb653e8a3d8aa04bb2c8cd4c +(1 row) + +SELECT sha256(''); + sha256 +-------------------------------------------------------------------- + \xe3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +(1 row) + +SELECT sha256('The quick brown fox jumps over the lazy dog.'); + sha256 +-------------------------------------------------------------------- + \xef537f25c895bfa782526529a9b63d97aa631564d5d789c2b765448c8635fb6c +(1 row) + +SELECT sha384(''); + sha384 +---------------------------------------------------------------------------------------------------- + \x38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b +(1 row) + +SELECT sha384('The quick brown fox jumps over the lazy dog.'); + sha384 +---------------------------------------------------------------------------------------------------- + \xed892481d8272ca6df370bf706e4d7bc1b5739fa2177aae6c50e946678718fc67a7af2819a021c2fc34e91bdb63409d7 +(1 row) + +SELECT sha512(''); + sha512 +------------------------------------------------------------------------------------------------------------------------------------ + \xcf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e +(1 row) + +SELECT sha512('The quick brown fox jumps over the lazy dog.'); + sha512 +------------------------------------------------------------------------------------------------------------------------------------ + \x91ea1245f20d46ae9a037a989f54f1f790f0a47607eeb8a14d12890cea77a1bbc6c7ed9cf205e67b7f2b8fd4c7dfd3a7a8617e45f3c463d481c7e586c39ac1ed +(1 row) + -- -- test behavior of escape_string_warning and standard_conforming_strings options -- @@ -1525,6 +1577,7 @@ select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\' as f4, ' -- -- Additional string functions -- +SET bytea_output TO escape; SELECT initcap('hi THOMAS'); initcap ----------- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 9ed242208f..5a82237870 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -506,6 +506,23 @@ CREATE TABLE toasttest (c char(4096)); select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE"; +-- +-- SHA-2 +-- +SET bytea_output TO hex; + +SELECT sha224(''); +SELECT sha224('The quick brown fox jumps over the lazy dog.'); + +SELECT sha256(''); +SELECT sha256('The quick brown fox jumps over the lazy dog.'); + +SELECT sha384(''); +SELECT sha384('The quick brown fox jumps over the lazy dog.'); + +SELECT sha512(''); +SELECT sha512('The quick brown fox jumps over the lazy dog.'); + -- -- test behavior of escape_string_warning and standard_conforming_strings options -- @@ -540,6 +557,7 @@ CREATE TABLE toasttest (c char(4096)); -- -- Additional string functions -- +SET bytea_output TO escape; SELECT initcap('hi THOMAS'); base-commit: 29d432e477a99f4c1e18820c5fc820a6b178c695 -- 2.16.2