Index: src/backend/utils/adt/varlena.c =================================================================== RCS file: /opt/src/cvs/pgsql-server/src/backend/utils/adt/varlena.c,v retrieving revision 1.87 diff -c -r1.87 varlena.c *** src/backend/utils/adt/varlena.c 4 Aug 2002 06:44:47 -0000 1.87 --- src/backend/utils/adt/varlena.c 16 Aug 2002 19:54:03 -0000 *************** *** 18,23 **** --- 18,25 ---- #include "mb/pg_wchar.h" #include "miscadmin.h" + #include "access/tuptoaster.h" + #include "lib/stringinfo.h" #include "utils/builtins.h" #include "utils/pg_locale.h" *************** *** 27,34 **** --- 29,62 ---- #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X)) #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n)) #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x) + #define PG_TEXTARG_GET_STR(arg_) \ + DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_))) + #define PG_TEXT_GET_STR(textp_) \ + DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_))) + #define PG_STR_GET_TEXT(str_) \ + DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_))) + #define TEXTLEN(textp) \ + text_length(PointerGetDatum(textp)) + #define TEXTPOS(buf_text, from_sub_text) \ + text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1) + #define TEXTDUP(textp) \ + DatumGetTextPCopy(PointerGetDatum(textp)) + #define LEFT(buf_text, from_sub_text) \ + text_substring(PointerGetDatum(buf_text), \ + 1, \ + TEXTPOS(buf_text, from_sub_text) - 1, false) + #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \ + text_substring(PointerGetDatum(buf_text), \ + TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \ + -1, true) static int text_cmp(text *arg1, text *arg2); + static int32 text_length(Datum str); + static int32 text_position(Datum str, Datum search_str, int matchnum); + static text *text_substring(Datum str, + int32 start, + int32 length, + bool length_not_specified); /***************************************************************************** *************** *** 285,303 **** Datum textlen(PG_FUNCTION_ARGS) { ! text *t = PG_GETARG_TEXT_P(0); ! #ifdef MULTIBYTE ! /* optimization for single byte encoding */ ! if (pg_database_encoding_max_length() <= 1) ! PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ); ! ! PG_RETURN_INT32( ! pg_mbstrlen_with_len(VARDATA(t), VARSIZE(t) - VARHDRSZ) ! ); ! #else ! PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ); ! #endif } /* --- 313,348 ---- Datum textlen(PG_FUNCTION_ARGS) { ! PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0))); ! } ! /* ! * text_length - ! * Does the real work for textlen() ! * This is broken out so it can be called directly by other string processing ! * functions. ! */ ! static int32 ! text_length(Datum str) ! { ! /* fastpath when max encoding length is one */ ! if (pg_database_encoding_max_length() == 1) ! PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); ! ! if (pg_database_encoding_max_length() > 1) ! { ! text *t = DatumGetTextP(str); ! ! PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t), ! VARSIZE(t) - VARHDRSZ)); ! } ! ! /* should never get here */ ! elog(ERROR, "Invalid backend encoding; encoding max length " ! "is less than one."); ! ! /* not reached: suppress compiler warning */ ! return 0; } /* *************** *** 308,316 **** Datum textoctetlen(PG_FUNCTION_ARGS) { ! text *arg = PG_GETARG_TEXT_P(0); ! ! PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ); } /* --- 353,359 ---- Datum textoctetlen(PG_FUNCTION_ARGS) { ! PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ); } /* *************** *** 382,471 **** * - Thomas Lockhart 1998-12-10 * Now uses faster TOAST-slicing interface * - John Gray 2002-02-22 */ Datum text_substr(PG_FUNCTION_ARGS) { ! text *string; ! int32 m = PG_GETARG_INT32(1); ! int32 n = PG_GETARG_INT32(2); ! int32 sm; ! int32 sn; ! int eml = 1; ! #ifdef MULTIBYTE ! int i; ! int len; ! text *ret; ! char *p; ! #endif ! /* ! * starting position before the start of the string? then offset into ! * the string per SQL92 spec... ! */ ! if (m < 1) { ! n += (m - 1); ! m = 1; ! } ! /* Check for m > octet length is made in TOAST access routine */ ! /* m will now become a zero-based starting position */ ! sm = m - 1; ! sn = n; ! #ifdef MULTIBYTE ! eml = pg_database_encoding_max_length (); ! if (eml > 1) { ! sm = 0; ! if (n > -1) ! sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */ else ! sn = n; /* n < 0 is special-cased by heap_tuple_untoast_attr_slice */ ! } ! #endif ! string = PG_GETARG_TEXT_P_SLICE (0, sm, sn); ! if (eml == 1) ! { ! PG_RETURN_TEXT_P (string); ! } ! #ifndef MULTIBYTE ! PG_RETURN_NULL(); /* notreached: suppress compiler warning */ ! #endif ! #ifdef MULTIBYTE ! if (n > -1) ! len = pg_mbstrlen_with_len (VARDATA (string), sn - 3); ! else /* n < 0 is special-cased; need full string length */ ! len = pg_mbstrlen_with_len (VARDATA (string), VARSIZE(string)-VARHDRSZ); ! ! if (m > len) ! { ! m = 1; ! n = 0; ! } ! m--; ! if (((m + n) > len) || (n < 0)) ! n = (len - m); ! ! p = VARDATA(string); ! for (i = 0; i < m; i++) ! p += pg_mblen(p); ! m = p - VARDATA(string); ! for (i = 0; i < n; i++) ! p += pg_mblen(p); ! n = p - (VARDATA(string) + m); ! ret = (text *) palloc(VARHDRSZ + n); ! VARATT_SIZEP(ret) = VARHDRSZ + n; ! memcpy(VARDATA(ret), VARDATA(string) + m, n); ! PG_RETURN_TEXT_P(ret); ! #endif } /* --- 425,625 ---- * - Thomas Lockhart 1998-12-10 * Now uses faster TOAST-slicing interface * - John Gray 2002-02-22 + * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change + * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw + * error; if E < 1, return '', not entire string). Fixed MB related bug when + * S > LC and < LC + 4 sometimes garbage characters are returned. + * - Joe Conway 2002-08-10 */ Datum text_substr(PG_FUNCTION_ARGS) { ! PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), ! PG_GETARG_INT32(1), ! PG_GETARG_INT32(2), ! false)); ! } ! /* ! * text_substr_no_len - ! * Wrapper to avoid opr_sanity failure due to ! * one function accepting a different number of args. ! */ ! Datum ! text_substr_no_len(PG_FUNCTION_ARGS) ! { ! PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), ! PG_GETARG_INT32(1), ! -1, true)); ! } ! ! /* ! * text_substring - ! * Does the real work for text_substr() and text_substr_no_len() ! * This is broken out so it can be called directly by other string processing ! * functions. ! */ ! static text* ! text_substring(Datum str, int32 start, int32 length, bool length_not_specified) ! { ! int32 eml = pg_database_encoding_max_length(); ! int32 S = start; /* start position */ ! int32 S1; /* adjusted start position */ ! int32 L1; /* adjusted substring length */ ! ! /* life is easy if the encoding max length is 1 */ ! if (eml == 1) { ! S1 = Max(S, 1); ! if (length_not_specified) /* special case - get length to end of string */ ! L1 = -1; ! else ! { ! /* end position */ ! int E = S + length; ! /* ! * A negative value for L is the only way for the end position ! * to be before the start. SQL99 says to throw an error. ! */ ! if (E < S) ! elog(ERROR, "negative substring length not allowed"); ! /* ! * A zero or negative value for the end position can happen if the start ! * was negative or one. SQL99 says to return a zero-length string. ! */ ! if (E < 1) ! return PG_STR_GET_TEXT(""); ! ! L1 = E - S1; ! } ! ! /* ! * If the start position is past the end of the string, ! * SQL99 says to return a zero-length string -- ! * PG_GETARG_TEXT_P_SLICE() will do that for us. ! * Convert to zero-based starting position ! */ ! return DatumGetTextPSlice(str, S1 - 1, L1); ! } ! else if (eml > 1) { ! /* ! * When encoding max length is > 1, we can't get LC without ! * detoasting, so we'll grab a conservatively large slice ! * now and go back later to do the right thing ! */ ! int32 slice_start; ! int32 slice_size; ! int32 slice_strlen; ! text *slice; ! int32 E1; ! int32 i; ! char *p; ! char *s; ! text *ret; ! ! /* ! * if S is past the end of the string, the tuple toaster ! * will return a zero-length string to us ! */ ! S1 = Max(S, 1); ! ! /* ! * We need to start at position zero because there is no ! * way to know in advance which byte offset corresponds to ! * the supplied start position. ! */ ! slice_start = 0; ! ! if (length_not_specified) /* special case - get length to end of string */ ! slice_size = L1 = -1; else ! { ! int E = S + length; ! ! /* ! * A negative value for L is the only way for the end position ! * to be before the start. SQL99 says to throw an error. ! */ ! if (E < S) ! elog(ERROR, "negative substring length not allowed"); ! /* ! * A zero or negative value for the end position can happen if the start ! * was negative or one. SQL99 says to return a zero-length string. ! */ ! if (E < 1) ! return PG_STR_GET_TEXT(""); ! /* ! * if E is past the end of the string, the tuple toaster ! * will truncate the length for us ! */ ! L1 = E - S1; ! ! /* ! * Total slice size in bytes can't be any longer than the start ! * position plus substring length times the encoding max length. ! */ ! slice_size = (S1 + L1) * eml; ! } ! slice = DatumGetTextPSlice(str, slice_start, slice_size); ! /* see if we got back an empty string */ ! if ((VARSIZE(slice) - VARHDRSZ) == 0) ! return PG_STR_GET_TEXT(""); ! /* Now we can get the actual length of the slice in MB characters */ ! slice_strlen = pg_mbstrlen_with_len (VARDATA(slice), VARSIZE(slice) - VARHDRSZ); ! /* Check that the start position wasn't > slice_strlen. If so, ! * SQL99 says to return a zero-length string. ! */ ! if (S1 > slice_strlen) ! return PG_STR_GET_TEXT(""); ! ! /* ! * Adjust L1 and E1 now that we know the slice string length. ! * Again remember that S1 is one based, and slice_start is zero based. ! */ ! if (L1 > -1) ! E1 = Min(S1 + L1 , slice_start + 1 + slice_strlen); ! else ! E1 = slice_start + 1 + slice_strlen; ! ! /* ! * Find the start position in the slice; ! * remember S1 is not zero based ! */ ! p = VARDATA(slice); ! for (i = 0; i < S1 - 1; i++) ! p += pg_mblen(p); ! ! /* hang onto a pointer to our start position */ ! s = p; ! ! /* ! * Count the actual bytes used by the substring of ! * the requested length. ! */ ! for (i = S1; i < E1; i++) ! p += pg_mblen(p); ! ! ret = (text *) palloc(VARHDRSZ + (p - s)); ! VARATT_SIZEP(ret) = VARHDRSZ + (p - s); ! memcpy(VARDATA(ret), s, (p - s)); ! ! return ret; ! } ! else ! elog(ERROR, "Invalid backend encoding; encoding max length " ! "is less than one."); ! ! /* not reached: suppress compiler warning */ ! return PG_STR_GET_TEXT(""); } /* *************** *** 481,536 **** Datum textpos(PG_FUNCTION_ARGS) { ! text *t1 = PG_GETARG_TEXT_P(0); ! text *t2 = PG_GETARG_TEXT_P(1); ! int pos; ! int px, ! p; ! int len1, len2; - pg_wchar *p1, - *p2; ! #ifdef MULTIBYTE ! pg_wchar *ps1, ! *ps2; ! #endif if (VARSIZE(t2) <= VARHDRSZ) PG_RETURN_INT32(1); /* result for empty pattern */ len1 = (VARSIZE(t1) - VARHDRSZ); len2 = (VARSIZE(t2) - VARHDRSZ); ! #ifdef MULTIBYTE ! ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar)); ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1); ! len1 = pg_wchar_strlen(p1); ! ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar)); ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2); ! len2 = pg_wchar_strlen(p2); ! #else ! p1 = VARDATA(t1); ! p2 = VARDATA(t2); ! #endif ! pos = 0; px = (len1 - len2); ! for (p = 0; p <= px; p++) { ! #ifdef MULTIBYTE ! if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0)) ! #else ! if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0)) ! #endif { ! pos = p + 1; ! break; ! }; ! p1++; ! }; ! #ifdef MULTIBYTE ! pfree(ps1); ! pfree(ps2); ! #endif PG_RETURN_INT32(pos); } --- 635,729 ---- Datum textpos(PG_FUNCTION_ARGS) { ! PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1)); ! } ! ! /* ! * text_position - ! * Does the real work for textpos() ! * This is broken out so it can be called directly by other string processing ! * functions. ! */ ! static int32 ! text_position(Datum str, Datum search_str, int matchnum) ! { ! int eml = pg_database_encoding_max_length(); ! text *t1 = DatumGetTextP(str); ! text *t2 = DatumGetTextP(search_str); ! int match = 0, ! pos = 0, ! p = 0, ! px, ! len1, len2; ! if(matchnum == 0) ! return 0; /* result for 0th match */ if (VARSIZE(t2) <= VARHDRSZ) PG_RETURN_INT32(1); /* result for empty pattern */ len1 = (VARSIZE(t1) - VARHDRSZ); len2 = (VARSIZE(t2) - VARHDRSZ); ! ! /* no use in searching str past point where search_str will fit */ px = (len1 - len2); ! ! if (eml == 1) /* simple case - single byte encoding */ { ! char *p1, ! *p2; ! ! p1 = VARDATA(t1); ! p2 = VARDATA(t2); ! ! for (p = 0; p <= px; p++) { ! if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0)) ! { ! if (++match == matchnum) ! { ! pos = p + 1; ! break; ! } ! } ! p1++; ! } ! } ! else if (eml > 1) /* not as simple - multibyte encoding */ ! { ! pg_wchar *p1, ! *p2, ! *ps1, ! *ps2; ! ! ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar)); ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1); ! len1 = pg_wchar_strlen(p1); ! ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar)); ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2); ! len2 = pg_wchar_strlen(p2); ! ! for (p = 0; p <= px; p++) ! { ! if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0)) ! { ! if (++match == matchnum) ! { ! pos = p + 1; ! break; ! } ! } ! p1++; ! } ! ! pfree(ps1); ! pfree(ps2); ! } ! else ! elog(ERROR, "Invalid backend encoding; encoding max length " ! "is less than one."); ! PG_RETURN_INT32(pos); } *************** *** 758,766 **** Datum byteaoctetlen(PG_FUNCTION_ARGS) { ! bytea *v = PG_GETARG_BYTEA_P(0); ! ! PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ); } /* --- 951,957 ---- Datum byteaoctetlen(PG_FUNCTION_ARGS) { ! PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ); } /* *************** *** 805,810 **** --- 996,1003 ---- PG_RETURN_BYTEA_P(result); } + #define PG_STR_GET_BYTEA(str_) \ + DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) /* * bytea_substr() * Return a substring starting at the specified position. *************** *** 813,845 **** * Input: * - string * - starting position (is one-based) ! * - string length * * If the starting position is zero or less, then return from the start of the string * adjusting the length to be consistent with the "negative start" per SQL92. ! * If the length is less than zero, return the remaining string. ! * */ Datum bytea_substr(PG_FUNCTION_ARGS) { ! int32 m = PG_GETARG_INT32(1); ! int32 n = PG_GETARG_INT32(2); ! /* ! * starting position before the start of the string? then offset into ! * the string per SQL92 spec... ! */ ! if (m < 1) { ! n += (m - 1); ! m = 1; } ! /* m will now become a zero-based starting position */ ! m--; ! PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, m, n)); } /* --- 1006,1076 ---- * Input: * - string * - starting position (is one-based) ! * - string length (optional) * * If the starting position is zero or less, then return from the start of the string * adjusting the length to be consistent with the "negative start" per SQL92. ! * If the length is less than zero, an ERROR is thrown. If no third argument ! * (length) is provided, the length to the end of the string is assumed. */ Datum bytea_substr(PG_FUNCTION_ARGS) { ! int S = PG_GETARG_INT32(1); /* start position */ ! int S1; /* adjusted start position */ ! int L1; /* adjusted substring length */ ! S1 = Max(S, 1); ! ! if (fcinfo->nargs == 2) ! { ! /* ! * Not passed a length - PG_GETARG_BYTEA_P_SLICE() ! * grabs everything to the end of the string if we pass it ! * a negative value for length. ! */ ! L1 = -1; ! } ! else { ! /* end position */ ! int E = S + PG_GETARG_INT32(2); ! ! /* ! * A negative value for L is the only way for the end position ! * to be before the start. SQL99 says to throw an error. ! */ ! if (E < S) ! elog(ERROR, "negative substring length not allowed"); ! ! /* ! * A zero or negative value for the end position can happen if the start ! * was negative or one. SQL99 says to return a zero-length string. ! */ ! if (E < 1) ! PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA("")); ! ! L1 = E - S1; } ! /* ! * If the start position is past the end of the string, ! * SQL99 says to return a zero-length string -- ! * PG_GETARG_TEXT_P_SLICE() will do that for us. ! * Convert to zero-based starting position ! */ ! PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, S1 - 1, L1)); ! } ! /* ! * bytea_substr_no_len - ! * Wrapper to avoid opr_sanity failure due to ! * one function accepting a different number of args. ! */ ! Datum ! bytea_substr_no_len(PG_FUNCTION_ARGS) ! { ! return bytea_substr(fcinfo); } /* *************** *** 1422,1424 **** --- 1653,1834 ---- PG_RETURN_INT32(cmp); } + + /* + * replace_text + * replace all occurences of 'old_sub_str' in 'orig_str' + * with 'new_sub_str' to form 'new_str' + * + * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == '' + * otherwise returns 'new_str' + */ + Datum + replace_text(PG_FUNCTION_ARGS) + { + text *left_text; + text *right_text; + text *buf_text; + text *ret_text; + int curr_posn; + text *src_text = PG_GETARG_TEXT_P(0); + int src_text_len = TEXTLEN(src_text); + text *from_sub_text = PG_GETARG_TEXT_P(1); + int from_sub_text_len = TEXTLEN(from_sub_text); + text *to_sub_text = PG_GETARG_TEXT_P(2); + char *to_sub_str = PG_TEXT_GET_STR(to_sub_text); + StringInfo str = makeStringInfo(); + + if (src_text_len == 0 || from_sub_text_len == 0) + PG_RETURN_TEXT_P(src_text); + + buf_text = TEXTDUP(src_text); + curr_posn = TEXTPOS(buf_text, from_sub_text); + + while (curr_posn > 0) + { + left_text = LEFT(buf_text, from_sub_text); + right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len); + + appendStringInfo(str, PG_TEXT_GET_STR(left_text)); + appendStringInfo(str, to_sub_str); + + pfree(buf_text); + pfree(left_text); + buf_text = right_text; + curr_posn = TEXTPOS(buf_text, from_sub_text); + } + + appendStringInfo(str, PG_TEXT_GET_STR(buf_text)); + pfree(buf_text); + + ret_text = PG_STR_GET_TEXT(str->data); + pfree(str->data); + pfree(str); + + PG_RETURN_TEXT_P(ret_text); + } + + /* + * split_text + * parse input string + * return ord item (1 based) + * based on provided field separator + */ + Datum + split_text(PG_FUNCTION_ARGS) + { + text *inputstring = PG_GETARG_TEXT_P(0); + int inputstring_len = TEXTLEN(inputstring); + text *fldsep = PG_GETARG_TEXT_P(1); + int fldsep_len = TEXTLEN(fldsep); + int fldnum = PG_GETARG_INT32(2); + int start_posn = 0; + int end_posn = 0; + text *result_text; + + /* return empty string for empty input string */ + if (inputstring_len < 1) + PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); + + /* empty field separator */ + if (fldsep_len < 1) + { + if (fldnum == 1) /* first field - just return the input string */ + PG_RETURN_TEXT_P(inputstring); + else /* otherwise return an empty string */ + PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); + } + + /* field number is 1 based */ + if (fldnum < 1) + elog(ERROR, "field position must be > 0"); + + start_posn = text_position(PointerGetDatum(inputstring), + PointerGetDatum(fldsep), + fldnum - 1); + end_posn = text_position(PointerGetDatum(inputstring), + PointerGetDatum(fldsep), + fldnum); + + if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */ + { + if (fldnum == 1) /* first field - just return the input string */ + PG_RETURN_TEXT_P(inputstring); + else /* otherwise return an empty string */ + PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); + } + else if ((start_posn != 0) && (end_posn == 0)) + { + /* last field requested */ + result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true); + PG_RETURN_TEXT_P(result_text); + } + else if ((start_posn == 0) && (end_posn != 0)) + { + /* first field requested */ + result_text = LEFT(inputstring, fldsep); + PG_RETURN_TEXT_P(result_text); + } + else + { + /* prior to last field requested */ + result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false); + PG_RETURN_TEXT_P(result_text); + } + } + + #define HEXBASE 16 + /* + * Convert a int32 to a string containing a base 16 (hex) representation of + * the number. + */ + Datum + to_hex32(PG_FUNCTION_ARGS) + { + static char digits[] = "0123456789abcdef"; + char buf[32]; /* bigger than needed, but reasonable */ + char *ptr, + *end; + text *result_text; + int32 value = PG_GETARG_INT32(0); + + end = ptr = buf + sizeof(buf) - 1; + *ptr = '\0'; + + do + { + *--ptr = digits[value % HEXBASE]; + value /= HEXBASE; + } while (ptr > buf && value); + + result_text = PG_STR_GET_TEXT(ptr); + PG_RETURN_TEXT_P(result_text); + } + + /* + * Convert a int64 to a string containing a base 16 (hex) representation of + * the number. + */ + Datum + to_hex64(PG_FUNCTION_ARGS) + { + static char digits[] = "0123456789abcdef"; + char buf[32]; /* bigger than needed, but reasonable */ + char *ptr, + *end; + text *result_text; + int64 value = PG_GETARG_INT64(0); + + end = ptr = buf + sizeof(buf) - 1; + *ptr = '\0'; + + do + { + *--ptr = digits[value % HEXBASE]; + value /= HEXBASE; + } while (ptr > buf && value); + + result_text = PG_STR_GET_TEXT(ptr); + PG_RETURN_TEXT_P(result_text); + } + Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /opt/src/cvs/pgsql-server/src/include/catalog/pg_proc.h,v retrieving revision 1.254 diff -c -r1.254 pg_proc.h *** src/include/catalog/pg_proc.h 15 Aug 2002 02:51:27 -0000 1.254 --- src/include/catalog/pg_proc.h 16 Aug 2002 18:53:13 -0000 *************** *** 2121,2127 **** DESCR("remove initial characters from string"); DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ )); DESCR("remove trailing characters from string"); ! DATA(insert OID = 883 ( substr PGNSP PGUID 14 f f t f i 2 25 "25 23" "select substr($1, $2, -1)" - _null_ )); DESCR("return portion of string"); DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ )); DESCR("trim both ends of string"); --- 2121,2127 ---- DESCR("remove initial characters from string"); DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ )); DESCR("remove trailing characters from string"); ! DATA(insert OID = 883 ( substr PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ )); DESCR("return portion of string"); DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ )); DESCR("trim both ends of string"); *************** *** 2130,2137 **** DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ )); DESCR("return portion of string"); ! DATA(insert OID = 937 ( substring PGNSP PGUID 14 f f t f i 2 25 "25 23" "select substring($1, $2, -1)" - _null_ )); DESCR("return portion of string"); /* for multi-byte support */ --- 2130,2145 ---- DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ )); DESCR("return portion of string"); ! DATA(insert OID = 937 ( substring PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ )); DESCR("return portion of string"); + DATA(insert OID = 2087 ( replace PGNSP PGUID 12 f f t f i 3 25 "25 25 25" replace_text - _null_ )); + DESCR("replace all occurrences of old_substr with new_substr in string"); + DATA(insert OID = 2088 ( split PGNSP PGUID 12 f f t f i 3 25 "25 25 23" split_text - _null_ )); + DESCR("split string by field_sep and return field_num"); + DATA(insert OID = 2089 ( to_hex PGNSP PGUID 12 f f t f i 1 25 "23" to_hex32 - _null_ )); + DESCR("convert int32 number to hex"); + DATA(insert OID = 2090 ( to_hex PGNSP PGUID 12 f f t f i 1 25 "20" to_hex64 - _null_ )); + DESCR("convert int64 number to hex"); /* for multi-byte support */ *************** *** 2778,2784 **** DESCR("concatenate"); DATA(insert OID = 2012 ( substring PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ )); DESCR("return portion of string"); ! DATA(insert OID = 2013 ( substring PGNSP PGUID 14 f f t f i 2 17 "17 23" "select substring($1, $2, -1)" - _null_ )); DESCR("return portion of string"); DATA(insert OID = 2014 ( position PGNSP PGUID 12 f f t f i 2 23 "17 17" byteapos - _null_ )); DESCR("return position of substring"); --- 2786,2796 ---- DESCR("concatenate"); DATA(insert OID = 2012 ( substring PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ )); DESCR("return portion of string"); ! DATA(insert OID = 2013 ( substring PGNSP PGUID 12 f f t f i 2 17 "17 23" bytea_substr_no_len - _null_ )); ! DESCR("return portion of string"); ! DATA(insert OID = 2085 ( substr PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ )); ! DESCR("return portion of string"); ! DATA(insert OID = 2086 ( substr PGNSP PGUID 12 f f t f i 2 17 "17 23" bytea_substr_no_len - _null_ )); DESCR("return portion of string"); DATA(insert OID = 2014 ( position PGNSP PGUID 12 f f t f i 2 23 "17 17" byteapos - _null_ )); DESCR("return position of substring"); Index: src/include/utils/builtins.h =================================================================== RCS file: /opt/src/cvs/pgsql-server/src/include/utils/builtins.h,v retrieving revision 1.191 diff -c -r1.191 builtins.h *** src/include/utils/builtins.h 15 Aug 2002 02:51:27 -0000 1.191 --- src/include/utils/builtins.h 16 Aug 2002 18:53:13 -0000 *************** *** 447,458 **** --- 447,463 ---- extern Datum textoctetlen(PG_FUNCTION_ARGS); extern Datum textpos(PG_FUNCTION_ARGS); extern Datum text_substr(PG_FUNCTION_ARGS); + extern Datum text_substr_no_len(PG_FUNCTION_ARGS); extern Datum name_text(PG_FUNCTION_ARGS); extern Datum text_name(PG_FUNCTION_ARGS); extern int varstr_cmp(char *arg1, int len1, char *arg2, int len2); extern List *textToQualifiedNameList(text *textval, const char *caller); extern bool SplitIdentifierString(char *rawstring, char separator, List **namelist); + extern Datum replace_text(PG_FUNCTION_ARGS); + extern Datum split_text(PG_FUNCTION_ARGS); + extern Datum to_hex32(PG_FUNCTION_ARGS); + extern Datum to_hex64(PG_FUNCTION_ARGS); extern Datum unknownin(PG_FUNCTION_ARGS); extern Datum unknownout(PG_FUNCTION_ARGS); *************** *** 476,481 **** --- 481,487 ---- extern Datum byteacat(PG_FUNCTION_ARGS); extern Datum byteapos(PG_FUNCTION_ARGS); extern Datum bytea_substr(PG_FUNCTION_ARGS); + extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); /* version.c */ extern Datum pgsql_version(PG_FUNCTION_ARGS); Index: src/test/regress/expected/strings.out =================================================================== RCS file: /opt/src/cvs/pgsql-server/src/test/regress/expected/strings.out,v retrieving revision 1.12 diff -c -r1.12 strings.out *** src/test/regress/expected/strings.out 11 Jun 2002 15:41:38 -0000 1.12 --- src/test/regress/expected/strings.out 16 Aug 2002 18:53:13 -0000 *************** *** 573,575 **** --- 573,738 ---- text and varchar (1 row) + -- + -- test substr with toasted text values + -- + CREATE TABLE toasttest(f1 text); + insert into toasttest values(repeat('1234567890',10000)); + insert into toasttest values(repeat('1234567890',10000)); + -- If the starting position is zero or less, then return from the start of the string + -- adjusting the length to be consistent with the "negative start" per SQL92. + SELECT substr(f1, -1, 5) from toasttest; + substr + -------- + 123 + 123 + (2 rows) + + -- If the length is less than zero, an ERROR is thrown. + SELECT substr(f1, 5, -1) from toasttest; + ERROR: negative substring length not allowed + -- If no third argument (length) is provided, the length to the end of the + -- string is assumed. + SELECT substr(f1, 99995) from toasttest; + substr + -------- + 567890 + 567890 + (2 rows) + + -- If start plus length is > string length, the result is truncated to + -- string length + SELECT substr(f1, 99995, 10) from toasttest; + substr + -------- + 567890 + 567890 + (2 rows) + + DROP TABLE toasttest; + -- + -- test substr with toasted bytea values + -- + CREATE TABLE toasttest(f1 bytea); + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); + -- If the starting position is zero or less, then return from the start of the string + -- adjusting the length to be consistent with the "negative start" per SQL92. + SELECT substr(f1, -1, 5) from toasttest; + substr + -------- + 123 + 123 + (2 rows) + + -- If the length is less than zero, an ERROR is thrown. + SELECT substr(f1, 5, -1) from toasttest; + ERROR: negative substring length not allowed + -- If no third argument (length) is provided, the length to the end of the + -- string is assumed. + SELECT substr(f1, 99995) from toasttest; + substr + -------- + 567890 + 567890 + (2 rows) + + -- If start plus length is > string length, the result is truncated to + -- string length + SELECT substr(f1, 99995, 10) from toasttest; + substr + -------- + 567890 + 567890 + (2 rows) + + DROP TABLE toasttest; + -- + -- test length + -- + SELECT length('abcdef') AS "length_6"; + length_6 + ---------- + 6 + (1 row) + + -- + -- test strpos + -- + SELECT strpos('abcdef', 'cd') AS "pos_3"; + pos_3 + ------- + 3 + (1 row) + + SELECT strpos('abcdef', 'xy') AS "pos_0"; + pos_0 + ------- + 0 + (1 row) + + -- + -- test replace + -- + SELECT replace('abcdef', 'de', '45') AS "abc45f"; + abc45f + -------- + abc45f + (1 row) + + SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo"; + ya123da123doo + --------------- + ya123da123doo + (1 row) + + SELECT replace('yabadoo', 'bad', '') AS "yaoo"; + yaoo + ------ + yaoo + (1 row) + + -- + -- test split + -- + select split('joeuser@mydatabase','@',0) AS "an error"; + ERROR: field position must be > 0 + select split('joeuser@mydatabase','@',1) AS "joeuser"; + joeuser + --------- + joeuser + (1 row) + + select split('joeuser@mydatabase','@',2) AS "mydatabase"; + mydatabase + ------------ + mydatabase + (1 row) + + select split('joeuser@mydatabase','@',3) AS "empty string"; + empty string + -------------- + + (1 row) + + select split('@joeuser@mydatabase@','@',2) AS "joeuser"; + joeuser + --------- + joeuser + (1 row) + + -- + -- test to_hex + -- + select to_hex(256*256*256 - 1) AS "ffffff"; + ffffff + -------- + ffffff + (1 row) + + select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff"; + ffffffff + ---------- + ffffffff + (1 row) + Index: src/test/regress/sql/strings.sql =================================================================== RCS file: /opt/src/cvs/pgsql-server/src/test/regress/sql/strings.sql,v retrieving revision 1.8 diff -c -r1.8 strings.sql *** src/test/regress/sql/strings.sql 11 Jun 2002 15:41:38 -0000 1.8 --- src/test/regress/sql/strings.sql 16 Aug 2002 18:53:13 -0000 *************** *** 197,199 **** --- 197,292 ---- SELECT text 'text' || char(20) ' and characters' AS "Concat text to char"; SELECT text 'text' || varchar ' and varchar' AS "Concat text to varchar"; + + -- + -- test substr with toasted text values + -- + CREATE TABLE toasttest(f1 text); + + insert into toasttest values(repeat('1234567890',10000)); + insert into toasttest values(repeat('1234567890',10000)); + + -- If the starting position is zero or less, then return from the start of the string + -- adjusting the length to be consistent with the "negative start" per SQL92. + SELECT substr(f1, -1, 5) from toasttest; + + -- If the length is less than zero, an ERROR is thrown. + SELECT substr(f1, 5, -1) from toasttest; + + -- If no third argument (length) is provided, the length to the end of the + -- string is assumed. + SELECT substr(f1, 99995) from toasttest; + + -- If start plus length is > string length, the result is truncated to + -- string length + SELECT substr(f1, 99995, 10) from toasttest; + + DROP TABLE toasttest; + + -- + -- test substr with toasted bytea values + -- + CREATE TABLE toasttest(f1 bytea); + + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); + + -- If the starting position is zero or less, then return from the start of the string + -- adjusting the length to be consistent with the "negative start" per SQL92. + SELECT substr(f1, -1, 5) from toasttest; + + -- If the length is less than zero, an ERROR is thrown. + SELECT substr(f1, 5, -1) from toasttest; + + -- If no third argument (length) is provided, the length to the end of the + -- string is assumed. + SELECT substr(f1, 99995) from toasttest; + + -- If start plus length is > string length, the result is truncated to + -- string length + SELECT substr(f1, 99995, 10) from toasttest; + + DROP TABLE toasttest; + + -- + -- test length + -- + + SELECT length('abcdef') AS "length_6"; + + -- + -- test strpos + -- + + SELECT strpos('abcdef', 'cd') AS "pos_3"; + + SELECT strpos('abcdef', 'xy') AS "pos_0"; + + -- + -- test replace + -- + SELECT replace('abcdef', 'de', '45') AS "abc45f"; + + SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo"; + + SELECT replace('yabadoo', 'bad', '') AS "yaoo"; + + -- + -- test split + -- + select split('joeuser@mydatabase','@',0) AS "an error"; + + select split('joeuser@mydatabase','@',1) AS "joeuser"; + + select split('joeuser@mydatabase','@',2) AS "mydatabase"; + + select split('joeuser@mydatabase','@',3) AS "empty string"; + + select split('@joeuser@mydatabase@','@',2) AS "joeuser"; + + -- + -- test to_hex + -- + select to_hex(256*256*256 - 1) AS "ffffff"; + + select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";