Re: [HACKERS] [GENERAL] workaround for lack of REPLACE()

From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
To: Joe Conway <mail(at)joeconway(dot)com>
Cc: pgsql-patches(at)postgresql(dot)org, Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Tatsuo Ishii <t-ishii(at)sra(dot)co(dot)jp>, lockhart(at)fourpalms(dot)org
Subject: Re: [HACKERS] [GENERAL] workaround for lack of REPLACE()
Date: 2002-08-22 03:23:51
Message-ID: 200208220323.g7M3Nqc00798@candle.pha.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-general pgsql-hackers pgsql-patches


Patch applied. Thanks.

---------------------------------------------------------------------------

Joe Conway wrote:
> Joe Conway wrote:
> > I took Tom's advice and added wrapper functions around text_substr() and
> > bytea_substr() to cover the 2 argument case.
> >
> > I also added tests to strings.sql to cover substr() on toasted columns
> > of both text and bytea.
> >
>
> Please replace the original patch (substr.2002.08.14.1.patch) with the
> attached. It includes everything from the previous one, plus newly
> implemented builtin functions:
>
> replace(string, from, to)
> -- replaces all occurrences of "from" in "string" to "to"
> split(string, fldsep, column)
> -- splits "string" on "fldsep" and returns "column" number piece
> to_hex(int32_num) & to_hex(int64_num)
> -- takes integer number and returns as hex string
>
> All previously discussed on the list; see thread at:
> http://archives.postgresql.org/pgsql-hackers/2002-07/msg00511.php
>
> Examples:
>
> SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
> ya123da123doo
> ---------------
> ya123da123doo
> (1 row)
>
> select split('joeuser(at)mydatabase','@',1) AS "joeuser";
> joeuser
> ---------
> joeuser
> (1 row)
>
> select split('joeuser(at)mydatabase','@',2) AS "mydatabase";
> mydatabase
> ------------
> mydatabase
> (1 row)
>
> select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS
> "ffffffff";
> ffffffff
> ----------
> ffffffff
> (1 row)
>
> Tests have been added to the regression suite.
>
> Passes all regression tests. I've checked the strings.sql script in a
> multibyte database and it works fine also. I'd appreciate a good look by
> someone more familiar with multibyte related issues though.
>
> If it is OK, I'd like to hold off on docs until this is committed and
> after beta starts.
>
> If there are no objections, please apply.
>
> Thanks,
>
> Joe

> Index: src/backend/utils/adt/varlena.c
> ===================================================================
> RCS file: /opt/src/cvs/pgsql-server/src/backend/utils/adt/varlena.c,v
> retrieving revision 1.87
> diff -c -r1.87 varlena.c
> *** src/backend/utils/adt/varlena.c 4 Aug 2002 06:44:47 -0000 1.87
> --- src/backend/utils/adt/varlena.c 16 Aug 2002 19:54:03 -0000
> ***************
> *** 18,23 ****
> --- 18,25 ----
>
> #include "mb/pg_wchar.h"
> #include "miscadmin.h"
> + #include "access/tuptoaster.h"
> + #include "lib/stringinfo.h"
> #include "utils/builtins.h"
> #include "utils/pg_locale.h"
>
> ***************
> *** 27,34 ****
> --- 29,62 ----
> #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
> #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
> #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
> + #define PG_TEXTARG_GET_STR(arg_) \
> + DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
> + #define PG_TEXT_GET_STR(textp_) \
> + DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
> + #define PG_STR_GET_TEXT(str_) \
> + DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
> + #define TEXTLEN(textp) \
> + text_length(PointerGetDatum(textp))
> + #define TEXTPOS(buf_text, from_sub_text) \
> + text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
> + #define TEXTDUP(textp) \
> + DatumGetTextPCopy(PointerGetDatum(textp))
> + #define LEFT(buf_text, from_sub_text) \
> + text_substring(PointerGetDatum(buf_text), \
> + 1, \
> + TEXTPOS(buf_text, from_sub_text) - 1, false)
> + #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
> + text_substring(PointerGetDatum(buf_text), \
> + TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
> + -1, true)
>
> static int text_cmp(text *arg1, text *arg2);
> + static int32 text_length(Datum str);
> + static int32 text_position(Datum str, Datum search_str, int matchnum);
> + static text *text_substring(Datum str,
> + int32 start,
> + int32 length,
> + bool length_not_specified);
>
>
> /*****************************************************************************
> ***************
> *** 285,303 ****
> Datum
> textlen(PG_FUNCTION_ARGS)
> {
> ! text *t = PG_GETARG_TEXT_P(0);
>
> ! #ifdef MULTIBYTE
> ! /* optimization for single byte encoding */
> ! if (pg_database_encoding_max_length() <= 1)
> ! PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
> !
> ! PG_RETURN_INT32(
> ! pg_mbstrlen_with_len(VARDATA(t), VARSIZE(t) - VARHDRSZ)
> ! );
> ! #else
> ! PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
> ! #endif
> }
>
> /*
> --- 313,348 ----
> Datum
> textlen(PG_FUNCTION_ARGS)
> {
> ! PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
> ! }
>
> ! /*
> ! * text_length -
> ! * Does the real work for textlen()
> ! * This is broken out so it can be called directly by other string processing
> ! * functions.
> ! */
> ! static int32
> ! text_length(Datum str)
> ! {
> ! /* fastpath when max encoding length is one */
> ! if (pg_database_encoding_max_length() == 1)
> ! PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
> !
> ! if (pg_database_encoding_max_length() > 1)
> ! {
> ! text *t = DatumGetTextP(str);
> !
> ! PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
> ! VARSIZE(t) - VARHDRSZ));
> ! }
> !
> ! /* should never get here */
> ! elog(ERROR, "Invalid backend encoding; encoding max length "
> ! "is less than one.");
> !
> ! /* not reached: suppress compiler warning */
> ! return 0;
> }
>
> /*
> ***************
> *** 308,316 ****
> Datum
> textoctetlen(PG_FUNCTION_ARGS)
> {
> ! text *arg = PG_GETARG_TEXT_P(0);
> !
> ! PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
> }
>
> /*
> --- 353,359 ----
> Datum
> textoctetlen(PG_FUNCTION_ARGS)
> {
> ! PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
> }
>
> /*
> ***************
> *** 382,471 ****
> * - Thomas Lockhart 1998-12-10
> * Now uses faster TOAST-slicing interface
> * - John Gray 2002-02-22
> */
> Datum
> text_substr(PG_FUNCTION_ARGS)
> {
> ! text *string;
> ! int32 m = PG_GETARG_INT32(1);
> ! int32 n = PG_GETARG_INT32(2);
> ! int32 sm;
> ! int32 sn;
> ! int eml = 1;
> ! #ifdef MULTIBYTE
> ! int i;
> ! int len;
> ! text *ret;
> ! char *p;
> ! #endif
>
> ! /*
> ! * starting position before the start of the string? then offset into
> ! * the string per SQL92 spec...
> ! */
> ! if (m < 1)
> {
> ! n += (m - 1);
> ! m = 1;
> ! }
> ! /* Check for m > octet length is made in TOAST access routine */
>
> ! /* m will now become a zero-based starting position */
> ! sm = m - 1;
> ! sn = n;
>
> ! #ifdef MULTIBYTE
> ! eml = pg_database_encoding_max_length ();
>
> ! if (eml > 1)
> {
> ! sm = 0;
> ! if (n > -1)
> ! sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */
> else
> ! sn = n; /* n < 0 is special-cased by heap_tuple_untoast_attr_slice */
> ! }
> ! #endif
>
> ! string = PG_GETARG_TEXT_P_SLICE (0, sm, sn);
>
> ! if (eml == 1)
> ! {
> ! PG_RETURN_TEXT_P (string);
> ! }
> ! #ifndef MULTIBYTE
> ! PG_RETURN_NULL(); /* notreached: suppress compiler warning */
> ! #endif
> ! #ifdef MULTIBYTE
> ! if (n > -1)
> ! len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
> ! else /* n < 0 is special-cased; need full string length */
> ! len = pg_mbstrlen_with_len (VARDATA (string), VARSIZE(string)-VARHDRSZ);
> !
> ! if (m > len)
> ! {
> ! m = 1;
> ! n = 0;
> ! }
> ! m--;
> ! if (((m + n) > len) || (n < 0))
> ! n = (len - m);
> !
> ! p = VARDATA(string);
> ! for (i = 0; i < m; i++)
> ! p += pg_mblen(p);
> ! m = p - VARDATA(string);
> ! for (i = 0; i < n; i++)
> ! p += pg_mblen(p);
> ! n = p - (VARDATA(string) + m);
>
> ! ret = (text *) palloc(VARHDRSZ + n);
> ! VARATT_SIZEP(ret) = VARHDRSZ + n;
>
> ! memcpy(VARDATA(ret), VARDATA(string) + m, n);
>
> ! PG_RETURN_TEXT_P(ret);
> ! #endif
> }
>
> /*
> --- 425,625 ----
> * - Thomas Lockhart 1998-12-10
> * Now uses faster TOAST-slicing interface
> * - John Gray 2002-02-22
> + * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
> + * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
> + * error; if E < 1, return '', not entire string). Fixed MB related bug when
> + * S > LC and < LC + 4 sometimes garbage characters are returned.
> + * - Joe Conway 2002-08-10
> */
> Datum
> text_substr(PG_FUNCTION_ARGS)
> {
> ! PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
> ! PG_GETARG_INT32(1),
> ! PG_GETARG_INT32(2),
> ! false));
> ! }
>
> ! /*
> ! * text_substr_no_len -
> ! * Wrapper to avoid opr_sanity failure due to
> ! * one function accepting a different number of args.
> ! */
> ! Datum
> ! text_substr_no_len(PG_FUNCTION_ARGS)
> ! {
> ! PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
> ! PG_GETARG_INT32(1),
> ! -1, true));
> ! }
> !
> ! /*
> ! * text_substring -
> ! * Does the real work for text_substr() and text_substr_no_len()
> ! * This is broken out so it can be called directly by other string processing
> ! * functions.
> ! */
> ! static text*
> ! text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
> ! {
> ! int32 eml = pg_database_encoding_max_length();
> ! int32 S = start; /* start position */
> ! int32 S1; /* adjusted start position */
> ! int32 L1; /* adjusted substring length */
> !
> ! /* life is easy if the encoding max length is 1 */
> ! if (eml == 1)
> {
> ! S1 = Max(S, 1);
>
> ! if (length_not_specified) /* special case - get length to end of string */
> ! L1 = -1;
> ! else
> ! {
> ! /* end position */
> ! int E = S + length;
>
> ! /*
> ! * A negative value for L is the only way for the end position
> ! * to be before the start. SQL99 says to throw an error.
> ! */
> ! if (E < S)
> ! elog(ERROR, "negative substring length not allowed");
>
> ! /*
> ! * A zero or negative value for the end position can happen if the start
> ! * was negative or one. SQL99 says to return a zero-length string.
> ! */
> ! if (E < 1)
> ! return PG_STR_GET_TEXT("");
> !
> ! L1 = E - S1;
> ! }
> !
> ! /*
> ! * If the start position is past the end of the string,
> ! * SQL99 says to return a zero-length string --
> ! * PG_GETARG_TEXT_P_SLICE() will do that for us.
> ! * Convert to zero-based starting position
> ! */
> ! return DatumGetTextPSlice(str, S1 - 1, L1);
> ! }
> ! else if (eml > 1)
> {
> ! /*
> ! * When encoding max length is > 1, we can't get LC without
> ! * detoasting, so we'll grab a conservatively large slice
> ! * now and go back later to do the right thing
> ! */
> ! int32 slice_start;
> ! int32 slice_size;
> ! int32 slice_strlen;
> ! text *slice;
> ! int32 E1;
> ! int32 i;
> ! char *p;
> ! char *s;
> ! text *ret;
> !
> ! /*
> ! * if S is past the end of the string, the tuple toaster
> ! * will return a zero-length string to us
> ! */
> ! S1 = Max(S, 1);
> !
> ! /*
> ! * We need to start at position zero because there is no
> ! * way to know in advance which byte offset corresponds to
> ! * the supplied start position.
> ! */
> ! slice_start = 0;
> !
> ! if (length_not_specified) /* special case - get length to end of string */
> ! slice_size = L1 = -1;
> else
> ! {
> ! int E = S + length;
> !
> ! /*
> ! * A negative value for L is the only way for the end position
> ! * to be before the start. SQL99 says to throw an error.
> ! */
> ! if (E < S)
> ! elog(ERROR, "negative substring length not allowed");
>
> ! /*
> ! * A zero or negative value for the end position can happen if the start
> ! * was negative or one. SQL99 says to return a zero-length string.
> ! */
> ! if (E < 1)
> ! return PG_STR_GET_TEXT("");
>
> ! /*
> ! * if E is past the end of the string, the tuple toaster
> ! * will truncate the length for us
> ! */
> ! L1 = E - S1;
> !
> ! /*
> ! * Total slice size in bytes can't be any longer than the start
> ! * position plus substring length times the encoding max length.
> ! */
> ! slice_size = (S1 + L1) * eml;
> ! }
> ! slice = DatumGetTextPSlice(str, slice_start, slice_size);
>
> ! /* see if we got back an empty string */
> ! if ((VARSIZE(slice) - VARHDRSZ) == 0)
> ! return PG_STR_GET_TEXT("");
>
> ! /* Now we can get the actual length of the slice in MB characters */
> ! slice_strlen = pg_mbstrlen_with_len (VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
>
> ! /* Check that the start position wasn't > slice_strlen. If so,
> ! * SQL99 says to return a zero-length string.
> ! */
> ! if (S1 > slice_strlen)
> ! return PG_STR_GET_TEXT("");
> !
> ! /*
> ! * Adjust L1 and E1 now that we know the slice string length.
> ! * Again remember that S1 is one based, and slice_start is zero based.
> ! */
> ! if (L1 > -1)
> ! E1 = Min(S1 + L1 , slice_start + 1 + slice_strlen);
> ! else
> ! E1 = slice_start + 1 + slice_strlen;
> !
> ! /*
> ! * Find the start position in the slice;
> ! * remember S1 is not zero based
> ! */
> ! p = VARDATA(slice);
> ! for (i = 0; i < S1 - 1; i++)
> ! p += pg_mblen(p);
> !
> ! /* hang onto a pointer to our start position */
> ! s = p;
> !
> ! /*
> ! * Count the actual bytes used by the substring of
> ! * the requested length.
> ! */
> ! for (i = S1; i < E1; i++)
> ! p += pg_mblen(p);
> !
> ! ret = (text *) palloc(VARHDRSZ + (p - s));
> ! VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
> ! memcpy(VARDATA(ret), s, (p - s));
> !
> ! return ret;
> ! }
> ! else
> ! elog(ERROR, "Invalid backend encoding; encoding max length "
> ! "is less than one.");
> !
> ! /* not reached: suppress compiler warning */
> ! return PG_STR_GET_TEXT("");
> }
>
> /*
> ***************
> *** 481,536 ****
> Datum
> textpos(PG_FUNCTION_ARGS)
> {
> ! text *t1 = PG_GETARG_TEXT_P(0);
> ! text *t2 = PG_GETARG_TEXT_P(1);
> ! int pos;
> ! int px,
> ! p;
> ! int len1,
> len2;
> - pg_wchar *p1,
> - *p2;
>
> ! #ifdef MULTIBYTE
> ! pg_wchar *ps1,
> ! *ps2;
> ! #endif
>
> if (VARSIZE(t2) <= VARHDRSZ)
> PG_RETURN_INT32(1); /* result for empty pattern */
>
> len1 = (VARSIZE(t1) - VARHDRSZ);
> len2 = (VARSIZE(t2) - VARHDRSZ);
> ! #ifdef MULTIBYTE
> ! ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
> ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
> ! len1 = pg_wchar_strlen(p1);
> ! ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
> ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
> ! len2 = pg_wchar_strlen(p2);
> ! #else
> ! p1 = VARDATA(t1);
> ! p2 = VARDATA(t2);
> ! #endif
> ! pos = 0;
> px = (len1 - len2);
> ! for (p = 0; p <= px; p++)
> {
> ! #ifdef MULTIBYTE
> ! if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
> ! #else
> ! if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
> ! #endif
> {
> ! pos = p + 1;
> ! break;
> ! };
> ! p1++;
> ! };
> ! #ifdef MULTIBYTE
> ! pfree(ps1);
> ! pfree(ps2);
> ! #endif
> PG_RETURN_INT32(pos);
> }
>
> --- 635,729 ----
> Datum
> textpos(PG_FUNCTION_ARGS)
> {
> ! PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
> ! }
> !
> ! /*
> ! * text_position -
> ! * Does the real work for textpos()
> ! * This is broken out so it can be called directly by other string processing
> ! * functions.
> ! */
> ! static int32
> ! text_position(Datum str, Datum search_str, int matchnum)
> ! {
> ! int eml = pg_database_encoding_max_length();
> ! text *t1 = DatumGetTextP(str);
> ! text *t2 = DatumGetTextP(search_str);
> ! int match = 0,
> ! pos = 0,
> ! p = 0,
> ! px,
> ! len1,
> len2;
>
> ! if(matchnum == 0)
> ! return 0; /* result for 0th match */
>
> if (VARSIZE(t2) <= VARHDRSZ)
> PG_RETURN_INT32(1); /* result for empty pattern */
>
> len1 = (VARSIZE(t1) - VARHDRSZ);
> len2 = (VARSIZE(t2) - VARHDRSZ);
> !
> ! /* no use in searching str past point where search_str will fit */
> px = (len1 - len2);
> !
> ! if (eml == 1) /* simple case - single byte encoding */
> {
> ! char *p1,
> ! *p2;
> !
> ! p1 = VARDATA(t1);
> ! p2 = VARDATA(t2);
> !
> ! for (p = 0; p <= px; p++)
> {
> ! if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
> ! {
> ! if (++match == matchnum)
> ! {
> ! pos = p + 1;
> ! break;
> ! }
> ! }
> ! p1++;
> ! }
> ! }
> ! else if (eml > 1) /* not as simple - multibyte encoding */
> ! {
> ! pg_wchar *p1,
> ! *p2,
> ! *ps1,
> ! *ps2;
> !
> ! ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
> ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
> ! len1 = pg_wchar_strlen(p1);
> ! ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
> ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
> ! len2 = pg_wchar_strlen(p2);
> !
> ! for (p = 0; p <= px; p++)
> ! {
> ! if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
> ! {
> ! if (++match == matchnum)
> ! {
> ! pos = p + 1;
> ! break;
> ! }
> ! }
> ! p1++;
> ! }
> !
> ! pfree(ps1);
> ! pfree(ps2);
> ! }
> ! else
> ! elog(ERROR, "Invalid backend encoding; encoding max length "
> ! "is less than one.");
> !
> PG_RETURN_INT32(pos);
> }
>
> ***************
> *** 758,766 ****
> Datum
> byteaoctetlen(PG_FUNCTION_ARGS)
> {
> ! bytea *v = PG_GETARG_BYTEA_P(0);
> !
> ! PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ);
> }
>
> /*
> --- 951,957 ----
> Datum
> byteaoctetlen(PG_FUNCTION_ARGS)
> {
> ! PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
> }
>
> /*
> ***************
> *** 805,810 ****
> --- 996,1003 ----
> PG_RETURN_BYTEA_P(result);
> }
>
> + #define PG_STR_GET_BYTEA(str_) \
> + DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
> /*
> * bytea_substr()
> * Return a substring starting at the specified position.
> ***************
> *** 813,845 ****
> * Input:
> * - string
> * - starting position (is one-based)
> ! * - string length
> *
> * If the starting position is zero or less, then return from the start of the string
> * adjusting the length to be consistent with the "negative start" per SQL92.
> ! * If the length is less than zero, return the remaining string.
> ! *
> */
> Datum
> bytea_substr(PG_FUNCTION_ARGS)
> {
> ! int32 m = PG_GETARG_INT32(1);
> ! int32 n = PG_GETARG_INT32(2);
>
> ! /*
> ! * starting position before the start of the string? then offset into
> ! * the string per SQL92 spec...
> ! */
> ! if (m < 1)
> {
> ! n += (m - 1);
> ! m = 1;
> }
>
> ! /* m will now become a zero-based starting position */
> ! m--;
>
> ! PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, m, n));
> }
>
> /*
> --- 1006,1076 ----
> * Input:
> * - string
> * - starting position (is one-based)
> ! * - string length (optional)
> *
> * If the starting position is zero or less, then return from the start of the string
> * adjusting the length to be consistent with the "negative start" per SQL92.
> ! * If the length is less than zero, an ERROR is thrown. If no third argument
> ! * (length) is provided, the length to the end of the string is assumed.
> */
> Datum
> bytea_substr(PG_FUNCTION_ARGS)
> {
> ! int S = PG_GETARG_INT32(1); /* start position */
> ! int S1; /* adjusted start position */
> ! int L1; /* adjusted substring length */
>
> ! S1 = Max(S, 1);
> !
> ! if (fcinfo->nargs == 2)
> ! {
> ! /*
> ! * Not passed a length - PG_GETARG_BYTEA_P_SLICE()
> ! * grabs everything to the end of the string if we pass it
> ! * a negative value for length.
> ! */
> ! L1 = -1;
> ! }
> ! else
> {
> ! /* end position */
> ! int E = S + PG_GETARG_INT32(2);
> !
> ! /*
> ! * A negative value for L is the only way for the end position
> ! * to be before the start. SQL99 says to throw an error.
> ! */
> ! if (E < S)
> ! elog(ERROR, "negative substring length not allowed");
> !
> ! /*
> ! * A zero or negative value for the end position can happen if the start
> ! * was negative or one. SQL99 says to return a zero-length string.
> ! */
> ! if (E < 1)
> ! PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
> !
> ! L1 = E - S1;
> }
>
> ! /*
> ! * If the start position is past the end of the string,
> ! * SQL99 says to return a zero-length string --
> ! * PG_GETARG_TEXT_P_SLICE() will do that for us.
> ! * Convert to zero-based starting position
> ! */
> ! PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, S1 - 1, L1));
> ! }
>
> ! /*
> ! * bytea_substr_no_len -
> ! * Wrapper to avoid opr_sanity failure due to
> ! * one function accepting a different number of args.
> ! */
> ! Datum
> ! bytea_substr_no_len(PG_FUNCTION_ARGS)
> ! {
> ! return bytea_substr(fcinfo);
> }
>
> /*
> ***************
> *** 1422,1424 ****
> --- 1653,1834 ----
>
> PG_RETURN_INT32(cmp);
> }
> +
> + /*
> + * replace_text
> + * replace all occurences of 'old_sub_str' in 'orig_str'
> + * with 'new_sub_str' to form 'new_str'
> + *
> + * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
> + * otherwise returns 'new_str'
> + */
> + Datum
> + replace_text(PG_FUNCTION_ARGS)
> + {
> + text *left_text;
> + text *right_text;
> + text *buf_text;
> + text *ret_text;
> + int curr_posn;
> + text *src_text = PG_GETARG_TEXT_P(0);
> + int src_text_len = TEXTLEN(src_text);
> + text *from_sub_text = PG_GETARG_TEXT_P(1);
> + int from_sub_text_len = TEXTLEN(from_sub_text);
> + text *to_sub_text = PG_GETARG_TEXT_P(2);
> + char *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
> + StringInfo str = makeStringInfo();
> +
> + if (src_text_len == 0 || from_sub_text_len == 0)
> + PG_RETURN_TEXT_P(src_text);
> +
> + buf_text = TEXTDUP(src_text);
> + curr_posn = TEXTPOS(buf_text, from_sub_text);
> +
> + while (curr_posn > 0)
> + {
> + left_text = LEFT(buf_text, from_sub_text);
> + right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
> +
> + appendStringInfo(str, PG_TEXT_GET_STR(left_text));
> + appendStringInfo(str, to_sub_str);
> +
> + pfree(buf_text);
> + pfree(left_text);
> + buf_text = right_text;
> + curr_posn = TEXTPOS(buf_text, from_sub_text);
> + }
> +
> + appendStringInfo(str, PG_TEXT_GET_STR(buf_text));
> + pfree(buf_text);
> +
> + ret_text = PG_STR_GET_TEXT(str->data);
> + pfree(str->data);
> + pfree(str);
> +
> + PG_RETURN_TEXT_P(ret_text);
> + }
> +
> + /*
> + * split_text
> + * parse input string
> + * return ord item (1 based)
> + * based on provided field separator
> + */
> + Datum
> + split_text(PG_FUNCTION_ARGS)
> + {
> + text *inputstring = PG_GETARG_TEXT_P(0);
> + int inputstring_len = TEXTLEN(inputstring);
> + text *fldsep = PG_GETARG_TEXT_P(1);
> + int fldsep_len = TEXTLEN(fldsep);
> + int fldnum = PG_GETARG_INT32(2);
> + int start_posn = 0;
> + int end_posn = 0;
> + text *result_text;
> +
> + /* return empty string for empty input string */
> + if (inputstring_len < 1)
> + PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
> +
> + /* empty field separator */
> + if (fldsep_len < 1)
> + {
> + if (fldnum == 1) /* first field - just return the input string */
> + PG_RETURN_TEXT_P(inputstring);
> + else /* otherwise return an empty string */
> + PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
> + }
> +
> + /* field number is 1 based */
> + if (fldnum < 1)
> + elog(ERROR, "field position must be > 0");
> +
> + start_posn = text_position(PointerGetDatum(inputstring),
> + PointerGetDatum(fldsep),
> + fldnum - 1);
> + end_posn = text_position(PointerGetDatum(inputstring),
> + PointerGetDatum(fldsep),
> + fldnum);
> +
> + if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
> + {
> + if (fldnum == 1) /* first field - just return the input string */
> + PG_RETURN_TEXT_P(inputstring);
> + else /* otherwise return an empty string */
> + PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
> + }
> + else if ((start_posn != 0) && (end_posn == 0))
> + {
> + /* last field requested */
> + result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
> + PG_RETURN_TEXT_P(result_text);
> + }
> + else if ((start_posn == 0) && (end_posn != 0))
> + {
> + /* first field requested */
> + result_text = LEFT(inputstring, fldsep);
> + PG_RETURN_TEXT_P(result_text);
> + }
> + else
> + {
> + /* prior to last field requested */
> + result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
> + PG_RETURN_TEXT_P(result_text);
> + }
> + }
> +
> + #define HEXBASE 16
> + /*
> + * Convert a int32 to a string containing a base 16 (hex) representation of
> + * the number.
> + */
> + Datum
> + to_hex32(PG_FUNCTION_ARGS)
> + {
> + static char digits[] = "0123456789abcdef";
> + char buf[32]; /* bigger than needed, but reasonable */
> + char *ptr,
> + *end;
> + text *result_text;
> + int32 value = PG_GETARG_INT32(0);
> +
> + end = ptr = buf + sizeof(buf) - 1;
> + *ptr = '\0';
> +
> + do
> + {
> + *--ptr = digits[value % HEXBASE];
> + value /= HEXBASE;
> + } while (ptr > buf && value);
> +
> + result_text = PG_STR_GET_TEXT(ptr);
> + PG_RETURN_TEXT_P(result_text);
> + }
> +
> + /*
> + * Convert a int64 to a string containing a base 16 (hex) representation of
> + * the number.
> + */
> + Datum
> + to_hex64(PG_FUNCTION_ARGS)
> + {
> + static char digits[] = "0123456789abcdef";
> + char buf[32]; /* bigger than needed, but reasonable */
> + char *ptr,
> + *end;
> + text *result_text;
> + int64 value = PG_GETARG_INT64(0);
> +
> + end = ptr = buf + sizeof(buf) - 1;
> + *ptr = '\0';
> +
> + do
> + {
> + *--ptr = digits[value % HEXBASE];
> + value /= HEXBASE;
> + } while (ptr > buf && value);
> +
> + result_text = PG_STR_GET_TEXT(ptr);
> + PG_RETURN_TEXT_P(result_text);
> + }
> +
> Index: src/include/catalog/pg_proc.h
> ===================================================================
> RCS file: /opt/src/cvs/pgsql-server/src/include/catalog/pg_proc.h,v
> retrieving revision 1.254
> diff -c -r1.254 pg_proc.h
> *** src/include/catalog/pg_proc.h 15 Aug 2002 02:51:27 -0000 1.254
> --- src/include/catalog/pg_proc.h 16 Aug 2002 18:53:13 -0000
> ***************
> *** 2121,2127 ****
> DESCR("remove initial characters from string");
> DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ ));
> DESCR("remove trailing characters from string");
> ! DATA(insert OID = 883 ( substr PGNSP PGUID 14 f f t f i 2 25 "25 23" "select substr($1, $2, -1)" - _null_ ));
> DESCR("return portion of string");
> DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ ));
> DESCR("trim both ends of string");
> --- 2121,2127 ----
> DESCR("remove initial characters from string");
> DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ ));
> DESCR("remove trailing characters from string");
> ! DATA(insert OID = 883 ( substr PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ ));
> DESCR("return portion of string");
> DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ ));
> DESCR("trim both ends of string");
> ***************
> *** 2130,2137 ****
>
> DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ ));
> DESCR("return portion of string");
> ! DATA(insert OID = 937 ( substring PGNSP PGUID 14 f f t f i 2 25 "25 23" "select substring($1, $2, -1)" - _null_ ));
> DESCR("return portion of string");
>
> /* for multi-byte support */
>
> --- 2130,2145 ----
>
> DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ ));
> DESCR("return portion of string");
> ! DATA(insert OID = 937 ( substring PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ ));
> DESCR("return portion of string");
> + DATA(insert OID = 2087 ( replace PGNSP PGUID 12 f f t f i 3 25 "25 25 25" replace_text - _null_ ));
> + DESCR("replace all occurrences of old_substr with new_substr in string");
> + DATA(insert OID = 2088 ( split PGNSP PGUID 12 f f t f i 3 25 "25 25 23" split_text - _null_ ));
> + DESCR("split string by field_sep and return field_num");
> + DATA(insert OID = 2089 ( to_hex PGNSP PGUID 12 f f t f i 1 25 "23" to_hex32 - _null_ ));
> + DESCR("convert int32 number to hex");
> + DATA(insert OID = 2090 ( to_hex PGNSP PGUID 12 f f t f i 1 25 "20" to_hex64 - _null_ ));
> + DESCR("convert int64 number to hex");
>
> /* for multi-byte support */
>
> ***************
> *** 2778,2784 ****
> DESCR("concatenate");
> DATA(insert OID = 2012 ( substring PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ ));
> DESCR("return portion of string");
> ! DATA(insert OID = 2013 ( substring PGNSP PGUID 14 f f t f i 2 17 "17 23" "select substring($1, $2, -1)" - _null_ ));
> DESCR("return portion of string");
> DATA(insert OID = 2014 ( position PGNSP PGUID 12 f f t f i 2 23 "17 17" byteapos - _null_ ));
> DESCR("return position of substring");
> --- 2786,2796 ----
> DESCR("concatenate");
> DATA(insert OID = 2012 ( substring PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ ));
> DESCR("return portion of string");
> ! DATA(insert OID = 2013 ( substring PGNSP PGUID 12 f f t f i 2 17 "17 23" bytea_substr_no_len - _null_ ));
> ! DESCR("return portion of string");
> ! DATA(insert OID = 2085 ( substr PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ ));
> ! DESCR("return portion of string");
> ! DATA(insert OID = 2086 ( substr PGNSP PGUID 12 f f t f i 2 17 "17 23" bytea_substr_no_len - _null_ ));
> DESCR("return portion of string");
> DATA(insert OID = 2014 ( position PGNSP PGUID 12 f f t f i 2 23 "17 17" byteapos - _null_ ));
> DESCR("return position of substring");
> Index: src/include/utils/builtins.h
> ===================================================================
> RCS file: /opt/src/cvs/pgsql-server/src/include/utils/builtins.h,v
> retrieving revision 1.191
> diff -c -r1.191 builtins.h
> *** src/include/utils/builtins.h 15 Aug 2002 02:51:27 -0000 1.191
> --- src/include/utils/builtins.h 16 Aug 2002 18:53:13 -0000
> ***************
> *** 447,458 ****
> --- 447,463 ----
> extern Datum textoctetlen(PG_FUNCTION_ARGS);
> extern Datum textpos(PG_FUNCTION_ARGS);
> extern Datum text_substr(PG_FUNCTION_ARGS);
> + extern Datum text_substr_no_len(PG_FUNCTION_ARGS);
> extern Datum name_text(PG_FUNCTION_ARGS);
> extern Datum text_name(PG_FUNCTION_ARGS);
> extern int varstr_cmp(char *arg1, int len1, char *arg2, int len2);
> extern List *textToQualifiedNameList(text *textval, const char *caller);
> extern bool SplitIdentifierString(char *rawstring, char separator,
> List **namelist);
> + extern Datum replace_text(PG_FUNCTION_ARGS);
> + extern Datum split_text(PG_FUNCTION_ARGS);
> + extern Datum to_hex32(PG_FUNCTION_ARGS);
> + extern Datum to_hex64(PG_FUNCTION_ARGS);
>
> extern Datum unknownin(PG_FUNCTION_ARGS);
> extern Datum unknownout(PG_FUNCTION_ARGS);
> ***************
> *** 476,481 ****
> --- 481,487 ----
> extern Datum byteacat(PG_FUNCTION_ARGS);
> extern Datum byteapos(PG_FUNCTION_ARGS);
> extern Datum bytea_substr(PG_FUNCTION_ARGS);
> + extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
>
> /* version.c */
> extern Datum pgsql_version(PG_FUNCTION_ARGS);
> Index: src/test/regress/expected/strings.out
> ===================================================================
> RCS file: /opt/src/cvs/pgsql-server/src/test/regress/expected/strings.out,v
> retrieving revision 1.12
> diff -c -r1.12 strings.out
> *** src/test/regress/expected/strings.out 11 Jun 2002 15:41:38 -0000 1.12
> --- src/test/regress/expected/strings.out 16 Aug 2002 18:53:13 -0000
> ***************
> *** 573,575 ****
> --- 573,738 ----
> text and varchar
> (1 row)
>
> + --
> + -- test substr with toasted text values
> + --
> + CREATE TABLE toasttest(f1 text);
> + insert into toasttest values(repeat('1234567890',10000));
> + insert into toasttest values(repeat('1234567890',10000));
> + -- If the starting position is zero or less, then return from the start of the string
> + -- adjusting the length to be consistent with the "negative start" per SQL92.
> + SELECT substr(f1, -1, 5) from toasttest;
> + substr
> + --------
> + 123
> + 123
> + (2 rows)
> +
> + -- If the length is less than zero, an ERROR is thrown.
> + SELECT substr(f1, 5, -1) from toasttest;
> + ERROR: negative substring length not allowed
> + -- If no third argument (length) is provided, the length to the end of the
> + -- string is assumed.
> + SELECT substr(f1, 99995) from toasttest;
> + substr
> + --------
> + 567890
> + 567890
> + (2 rows)
> +
> + -- If start plus length is > string length, the result is truncated to
> + -- string length
> + SELECT substr(f1, 99995, 10) from toasttest;
> + substr
> + --------
> + 567890
> + 567890
> + (2 rows)
> +
> + DROP TABLE toasttest;
> + --
> + -- test substr with toasted bytea values
> + --
> + CREATE TABLE toasttest(f1 bytea);
> + insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
> + insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
> + -- If the starting position is zero or less, then return from the start of the string
> + -- adjusting the length to be consistent with the "negative start" per SQL92.
> + SELECT substr(f1, -1, 5) from toasttest;
> + substr
> + --------
> + 123
> + 123
> + (2 rows)
> +
> + -- If the length is less than zero, an ERROR is thrown.
> + SELECT substr(f1, 5, -1) from toasttest;
> + ERROR: negative substring length not allowed
> + -- If no third argument (length) is provided, the length to the end of the
> + -- string is assumed.
> + SELECT substr(f1, 99995) from toasttest;
> + substr
> + --------
> + 567890
> + 567890
> + (2 rows)
> +
> + -- If start plus length is > string length, the result is truncated to
> + -- string length
> + SELECT substr(f1, 99995, 10) from toasttest;
> + substr
> + --------
> + 567890
> + 567890
> + (2 rows)
> +
> + DROP TABLE toasttest;
> + --
> + -- test length
> + --
> + SELECT length('abcdef') AS "length_6";
> + length_6
> + ----------
> + 6
> + (1 row)
> +
> + --
> + -- test strpos
> + --
> + SELECT strpos('abcdef', 'cd') AS "pos_3";
> + pos_3
> + -------
> + 3
> + (1 row)
> +
> + SELECT strpos('abcdef', 'xy') AS "pos_0";
> + pos_0
> + -------
> + 0
> + (1 row)
> +
> + --
> + -- test replace
> + --
> + SELECT replace('abcdef', 'de', '45') AS "abc45f";
> + abc45f
> + --------
> + abc45f
> + (1 row)
> +
> + SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
> + ya123da123doo
> + ---------------
> + ya123da123doo
> + (1 row)
> +
> + SELECT replace('yabadoo', 'bad', '') AS "yaoo";
> + yaoo
> + ------
> + yaoo
> + (1 row)
> +
> + --
> + -- test split
> + --
> + select split('joeuser(at)mydatabase','@',0) AS "an error";
> + ERROR: field position must be > 0
> + select split('joeuser(at)mydatabase','@',1) AS "joeuser";
> + joeuser
> + ---------
> + joeuser
> + (1 row)
> +
> + select split('joeuser(at)mydatabase','@',2) AS "mydatabase";
> + mydatabase
> + ------------
> + mydatabase
> + (1 row)
> +
> + select split('joeuser(at)mydatabase','@',3) AS "empty string";
> + empty string
> + --------------
> +
> + (1 row)
> +
> + select split('@joeuser(at)mydatabase@','@',2) AS "joeuser";
> + joeuser
> + ---------
> + joeuser
> + (1 row)
> +
> + --
> + -- test to_hex
> + --
> + select to_hex(256*256*256 - 1) AS "ffffff";
> + ffffff
> + --------
> + ffffff
> + (1 row)
> +
> + select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";
> + ffffffff
> + ----------
> + ffffffff
> + (1 row)
> +
> Index: src/test/regress/sql/strings.sql
> ===================================================================
> RCS file: /opt/src/cvs/pgsql-server/src/test/regress/sql/strings.sql,v
> retrieving revision 1.8
> diff -c -r1.8 strings.sql
> *** src/test/regress/sql/strings.sql 11 Jun 2002 15:41:38 -0000 1.8
> --- src/test/regress/sql/strings.sql 16 Aug 2002 18:53:13 -0000
> ***************
> *** 197,199 ****
> --- 197,292 ----
> SELECT text 'text' || char(20) ' and characters' AS "Concat text to char";
>
> SELECT text 'text' || varchar ' and varchar' AS "Concat text to varchar";
> +
> + --
> + -- test substr with toasted text values
> + --
> + CREATE TABLE toasttest(f1 text);
> +
> + insert into toasttest values(repeat('1234567890',10000));
> + insert into toasttest values(repeat('1234567890',10000));
> +
> + -- If the starting position is zero or less, then return from the start of the string
> + -- adjusting the length to be consistent with the "negative start" per SQL92.
> + SELECT substr(f1, -1, 5) from toasttest;
> +
> + -- If the length is less than zero, an ERROR is thrown.
> + SELECT substr(f1, 5, -1) from toasttest;
> +
> + -- If no third argument (length) is provided, the length to the end of the
> + -- string is assumed.
> + SELECT substr(f1, 99995) from toasttest;
> +
> + -- If start plus length is > string length, the result is truncated to
> + -- string length
> + SELECT substr(f1, 99995, 10) from toasttest;
> +
> + DROP TABLE toasttest;
> +
> + --
> + -- test substr with toasted bytea values
> + --
> + CREATE TABLE toasttest(f1 bytea);
> +
> + insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
> + insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
> +
> + -- If the starting position is zero or less, then return from the start of the string
> + -- adjusting the length to be consistent with the "negative start" per SQL92.
> + SELECT substr(f1, -1, 5) from toasttest;
> +
> + -- If the length is less than zero, an ERROR is thrown.
> + SELECT substr(f1, 5, -1) from toasttest;
> +
> + -- If no third argument (length) is provided, the length to the end of the
> + -- string is assumed.
> + SELECT substr(f1, 99995) from toasttest;
> +
> + -- If start plus length is > string length, the result is truncated to
> + -- string length
> + SELECT substr(f1, 99995, 10) from toasttest;
> +
> + DROP TABLE toasttest;
> +
> + --
> + -- test length
> + --
> +
> + SELECT length('abcdef') AS "length_6";
> +
> + --
> + -- test strpos
> + --
> +
> + SELECT strpos('abcdef', 'cd') AS "pos_3";
> +
> + SELECT strpos('abcdef', 'xy') AS "pos_0";
> +
> + --
> + -- test replace
> + --
> + SELECT replace('abcdef', 'de', '45') AS "abc45f";
> +
> + SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
> +
> + SELECT replace('yabadoo', 'bad', '') AS "yaoo";
> +
> + --
> + -- test split
> + --
> + select split('joeuser(at)mydatabase','@',0) AS "an error";
> +
> + select split('joeuser(at)mydatabase','@',1) AS "joeuser";
> +
> + select split('joeuser(at)mydatabase','@',2) AS "mydatabase";
> +
> + select split('joeuser(at)mydatabase','@',3) AS "empty string";
> +
> + select split('@joeuser(at)mydatabase@','@',2) AS "joeuser";
> +
> + --
> + -- test to_hex
> + --
> + select to_hex(256*256*256 - 1) AS "ffffff";
> +
> + select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";

>
> ---------------------------(end of broadcast)---------------------------
> TIP 5: Have you checked our extensive FAQ?
>
> http://www.postgresql.org/users-lounge/docs/faq.html

--
Bruce Momjian | http://candle.pha.pa.us
pgman(at)candle(dot)pha(dot)pa(dot)us | (610) 359-1001
+ If your life is a hard drive, | 13 Roberts Road
+ Christ can be your backup. | Newtown Square, Pennsylvania 19073

In response to

Browse pgsql-general by date

  From Date Subject
Next Message Tom Lane 2002-08-22 03:55:12 Re: Default values, inserts, and rules...
Previous Message Mark Stosberg 2002-08-22 02:20:54 Re: Event recurrence - in database or in application code ????

Browse pgsql-hackers by date

  From Date Subject
Next Message Bruce Momjian 2002-08-22 03:25:14 Re: libpq++ documentation ...
Previous Message Marc G. Fournier 2002-08-22 03:21:39 Re: libpq++ documentation ...

Browse pgsql-patches by date

  From Date Subject
Next Message Bruce Momjian 2002-08-22 04:47:17 Re: bytea operator bugs (was Re: [GENERAL] BYTEA, indexes
Previous Message Peter Eisentraut 2002-08-21 20:46:18 Re: swedish translation