From: | Chao Li <li(dot)evan(dot)chao(at)gmail(dot)com> |
---|---|
To: | Álvaro Herrera <alvherre(at)kurilemu(dot)de> |
Cc: | "David E(dot) Wheeler" <david(at)justatheory(dot)com>, Florents Tselai <florents(dot)tselai(at)gmail(dot)com>, Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Peter Eisentraut <peter(at)eisentraut(dot)org>, Robert Haas <robertmhaas(at)gmail(dot)com>, Alexander Korotkov <aekorotkov(at)gmail(dot)com>, pgsql-hackers <pgsql-hackers(at)lists(dot)postgresql(dot)org>, Andrew Dunstan <andrew(at)dunslane(dot)net> |
Subject: | Re: PATCH: jsonpath string methods: lower, upper, initcap, l/r/btrim, replace, split_part |
Date: | 2025-10-23 02:43:13 |
Message-ID: | 87530674-E6B6-4C97-A704-78C7E07CF01F@gmail.com |
Views: | Whole Thread | Raw Message | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-hackers |
Hi Alvora,
I have reviewed and tested the patch, and got a few comments.
> On Oct 21, 2025, at 16:05, Álvaro Herrera <alvherre(at)kurilemu(dot)de> wrote:
>
> This was lacking rebase after the func.sgml changes. Here it is again.
> I have not reviewed it.
>
> --
> Álvaro Herrera 48°01'N 7°57'E — https://www.EnterpriseDB.com/
> <v13-0001-Rename-jsonpath-method-arg-tokens.patch><v13-0002-Add-additional-jsonpath-string-methods.patch>
1 - jsonpath.c
```
case jpiStringFunc:
return "string";
+ case jpiStrReplace:
+ return "replace";
+ case jpiStrLower:
+ return "lower";
+ case jpiStrUpper:
+ return "upper";
case jpiTime:
return "time";
case jpiTimeTz:
@@ -914,6 +982,16 @@ jspOperationName(JsonPathItemType type)
return "timestamp";
case jpiTimestampTz:
return "timestamp_tz";
+ case jpiStrLtrim:
+ return "ltrim";
+ case jpiStrRtrim:
+ return "rtrim";
+ case jpiStrBtrim:
+ return "btrim";
+ case jpiStrInitcap:
+ return "initcap";
+ case jpiStrSplitPart:
+ return "split_part";
default:
elog(ERROR, "unrecognized jsonpath item type: %d", type);
return NULL;
```
I wonder if there is some consideration for the order? Feels that jpiSttLtrim and the following jpiStrXXX should be placed above jpiTimeXXX.
2 - jsonpath.c
```
+ if (v->content.arg)
+ {
+ jspGetArg(v, &elem);
+ printJsonPathItem(buf, &elem, false, false);
+ }
```
As there is jspGetArg(), for v->content.arg, does it make sense to add a macro or inline function of jspHasArg(v)?
3 - jsonpath.c
```
+ case jpiStrLtrim:
+ return "ltrim";
+ case jpiStrRtrim:
+ return "rtrim";
+ case jpiStrBtrim:
+ return "btrim";
```
I know “b” in “btrim” stands for “both”, just curious why trim both side function is named “btrim()”? In most of programming languages I am aware of, trim() is the choice.
4 - jsonpath_exec.c
```
default:
;
/* cant' happen */
```
Typo: cant’ -> can’t
5 - jsonpath_exec.c
```
+ /* Create the appropriate jb value to return */
+ switch (jsp->type)
+ {
+ /* Cases for functions that return text */
+ case jpiStrLower:
+ case jpiStrUpper:
+ case jpiStrReplace:
+ case jpiStrLtrim:
+ case jpiStrRtrim:
+ case jpiStrBtrim:
+ case jpiStrInitcap:
+ case jpiStrSplitPart:
+ jb->type = jbvString;
+ jb->val.string.val = resStr;
+ jb->val.string.len = strlen(jb->val.string.val);
+ default:
+ ;
+ /* cant' happen */
+ }
```
As “default” clause has a comment “can’t happen”, I believe “break” is missing in the case clause.
Also, do we want to add an assert in default to report a message in case it happens?
6 - jsonpath_exec.c
```
+ resStr = TextDatumGetCString(DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
+ CStringGetTextDatum(tmp),
+ CStringGetTextDatum(from_str),
+ CStringGetTextDatum(to_str)));
```
For trim functions, DEFAULT_COLLATION_OID used. Why C_COLLATION_OID is used for replace and split_part? I don’t see anything mentioned in your changes to the doc (func-json.sgml).
7 - jsonpath_exec.c
```
+ if (!(jb = getScalar(jb, jbvString)))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_SQL_JSON_DATETIME_FUNCTION),
+ errmsg("jsonpath item method .%s() can only be applied to a string",
+ jspOperationName(jsp->type)))));
```
ERRCODE_INVALID_ARGUMENT_FOR_SQL_JSON_DATETIME_FUNCTION seems wrong, this is a string function, not a date time function.
8 - jsonpath_exec.c
```
+ switch (jsp->type)
+ {
+ case jpiStrLtrim:
+ case jpiStrRtrim:
+ case jpiStrBtrim:
+ {
+ char *characters_str;
+ int characters_len;
+ PGFunction func = NULL;
+
+ if (jsp->content.arg)
+ {
+ switch (jsp->type)
+ {
+ case jpiStrLtrim:
+ func = ltrim;
+ break;
+ case jpiStrRtrim:
+ func = rtrim;
+ break;
+ case jpiStrBtrim:
+ func = btrim;
+ break;
+ default:;
+ }
+ jspGetArg(jsp, &elem);
+ if (elem.type != jpiString)
+ elog(ERROR, "invalid jsonpath item type for .%s() argument", jspOperationName(jsp->type));
+
+ characters_str = jspGetString(&elem, &characters_len);
+ resStr = TextDatumGetCString(DirectFunctionCall2Coll(func,
+ DEFAULT_COLLATION_OID, str,
+ CStringGetTextDatum(characters_str)));
+ break;
+ }
+
+ switch (jsp->type)
+ {
+ case jpiStrLtrim:
+ func = ltrim1;
+ break;
+ case jpiStrRtrim:
+ func = rtrim1;
+ break;
+ case jpiStrBtrim:
+ func = btrim1;
+ break;
+ default:;
+ }
+ resStr = TextDatumGetCString(DirectFunctionCall1Coll(func,
+ DEFAULT_COLLATION_OID, str));
+ break;
+ }
```
The two nested “switch (jsp->type)” are quit redundant. We can pull up the second one, and simplify the first one, something like:
```
Switch (jsp->)
{
Case:
..
}
If (jsp->content.arg)
{
jspGetArg(jsp, &elem);
...
}
```
9 - jsonpath_exec.c
```
+ if (elem.type != jpiString)
+ elog(ERROR, "invalid jsonpath item type for .replace() from");
+
+ from_str = jspGetString(&elem, &from_len);
+
+ jspGetRightArg(jsp, &elem);
+ if (elem.type != jpiString)
+ elog(ERROR, "invalid jsonpath item type for .replace() to");
```
In these two elog(), do we want to log the invalid type? As I see in the “default” clause, jsp->type is logged:
```
+ default:
+ elog(ERROR, "unsupported jsonpath item type: %d", jsp->type);
```
Best regards,
--
Chao Li (Evan)
HighGo Software Co., Ltd.
https://www.highgo.com/
From | Date | Subject | |
---|---|---|---|
Next Message | Richard Guo | 2025-10-23 02:44:54 | Re: Issue with query_is_distinct_for() and grouping sets |
Previous Message | Tom Lane | 2025-10-23 02:40:10 | Re: Identifying Schema-Qualified Sequence References in Column Defaults |