From 8a0394019be3135fc66f386f58a759b402dbb45f Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 20 Jan 2021 16:53:30 +0100
Subject: [PATCH v49 3/3] Replace assuming a composite object on a scalar

For jsonb subscripting assignment it could happen that the provided path
assumes an object or an array at some level, but the source jsonb has a
scalar value there. Originally the update operation will be skipped and
no message will be shown that nothing happened. Return an error to
indicate such situations.
---
 doc/src/sgml/json.sgml              | 39 +++++++++++++++++++++++------
 src/backend/utils/adt/jsonfuncs.c   | 27 ++++++++++++++++++++
 src/test/regress/expected/jsonb.out | 27 ++++++++++++++++++++
 src/test/regress/sql/jsonb.sql      | 17 +++++++++++++
 4 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml
index 07bd19f974..e16dd6973d 100644
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -614,8 +614,24 @@ SELECT jdoc-&gt;'guid', jdoc-&gt;'name' FROM api WHERE jdoc @&gt; '{"tags": ["qu
    The result of a subscripting expression is always of the jsonb data type.
   </para>
 
+  <para>
+   <command>UPDATE</command> statements may use subscripting in the
+   <literal>SET</literal> clause to modify <type>jsonb</type> values. Subscript
+   paths must be traversible for all affected values insofar as they exist. For
+   instance, the path <literal>val['a']['b']['c']</literal> can be traversed all
+   the way to <literal>c</literal> if every <literal>val</literal>,
+   <literal>val['a']</literal>, and <literal>val['a']['b']</literal> is an
+   object. If any <literal>val['a']</literal> or <literal>val['a']['b']</literal>
+   is not defined, it will be created as an empty object and filled as
+   necessary. However, if any <literal>val</literal> itself or one of the
+   intermediary values is defined as a non-object such as a string, number, or
+   <literal>jsonb</literal> <literal>null</literal>, traversal cannot proceed so
+   an error is raised and the transaction aborted.
+  </para>
+
   <para>
    An example of subscripting syntax:
+
 <programlisting>
 
 -- Extract object value by key
@@ -631,6 +647,10 @@ SELECT ('[1, "2", null]'::jsonb)[1];
 -- value must be of the jsonb type as well
 UPDATE table_name SET jsonb_field['key'] = '1';
 
+-- This will raise an error if any record's jsonb_field['a']['b'] is something
+-- other than an object. For example, the value {"a": 1} has no 'b' key.
+UPDATE table_name SET jsonb_field['a']['b']['c'] = '1';
+
 -- Filter records using a WHERE clause with subscripting. Since the result of
 -- subscripting is jsonb, the value we compare it against must also be jsonb.
 -- The double quotes make "value" also a valid jsonb string.
@@ -639,8 +659,9 @@ SELECT * FROM table_name WHERE jsonb_field['key'] = '"value"';
 
    <type>jsonb</type> assignment via subscripting handles a few edge cases
    differently from <literal>jsonb_set</literal>. When a source <type>jsonb</type>
-   is <literal>NULL</literal>, assignment via subscripting will proceed as if
-   it was an empty JSON object:
+   value is <literal>NULL</literal>, assignment via subscripting will proceed
+   as if it was an empty JSON value of the type (object or array) implied by the
+   subscript key:
 
 <programlisting>
 -- Where jsonb_field was NULL, it is now {"a": 1}
@@ -661,17 +682,19 @@ UPDATE table_name SET jsonb_field[2] = '2';
 </programlisting>
 
    A <type>jsonb</type> value will accept assignments to nonexistent subscript
-   paths as long as the last existing path key is an object or an array. Since
-   the final subscript is not traversed, it may be an object key. Nested arrays
-   will be created and <literal>NULL</literal>-padded according to the path until
-   the value can be placed appropriately.
+   paths as long as the last existing element to be traversed is an object or
+   array, as implied by the corresponding subscript (the element indicated by
+   the last subscript in the path is not traversed and may be anything). Nested
+   array and object structures will be created, and in the former case
+   <literal>null</literal>-padded, as specified by the subscript path until the
+   assigned value can be placed.
 
 <programlisting>
 -- Where jsonb_field was {}, it is now {'a': [{'b': 1}]}
 UPDATE table_name SET jsonb_field['a'][0]['b'] = '1';
 
--- Where jsonb_field was [], it is now [{'a': 1}]
-UPDATE table_name SET jsonb_field[0]['a'] = '1';
+-- Where jsonb_field was [], it is now [null, {'a': 1}]
+UPDATE table_name SET jsonb_field[1]['a'] = '1';
 </programlisting>
 
   </para>
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index f14f6c3191..9138b7950e 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -4926,6 +4926,20 @@ setPath(JsonbIterator **it, Datum *path_elems,
 	switch (r)
 	{
 		case WJB_BEGIN_ARRAY:
+			/*
+			 * If instructed complain about attempts to replace whithin a raw
+			 * scalar value. This happens even when current level is equal to
+			 * path_len, because the last path key should also correspond to an
+			 * object or an array, not raw scalar.
+			 */
+			if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1) &&
+				v.val.array.rawScalar)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("cannot replace existing key"),
+						 errdetail("The path assumes key is a composite object, "
+								   "but it is a scalar value.")));
+
 			(void) pushJsonbValue(st, r, NULL);
 			setPathArray(it, path_elems, path_nulls, path_len, st, level,
 						 newval, v.val.array.nElems, op_type);
@@ -4943,6 +4957,19 @@ setPath(JsonbIterator **it, Datum *path_elems,
 			break;
 		case WJB_ELEM:
 		case WJB_VALUE:
+			/*
+			 * If instructed complain about attempts to replace whithin a
+			 * scalar value. This happens even when current level is equal to
+			 * path_len, because the last path key should also correspond to an
+			 * object or an array, not an element or value.
+			 */
+			if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("cannot replace existing key"),
+						 errdetail("The path assumes key is a composite object, "
+								   "but it is a scalar value.")));
+
 			res = pushJsonbValue(st, r, &v);
 			break;
 		default:
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 5b5510c4fd..cf0ce0e44c 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -5134,6 +5134,33 @@ select * from test_jsonb_subscript;
   1 | {"a": [null, {"c": [null, null, 1]}]}
 (1 row)
 
+-- trying replace assuming a composite object, but it's an element or a value
+delete from test_jsonb_subscript;
+insert into test_jsonb_subscript values (1, '{"a": 1}');
+update test_jsonb_subscript set test_json['a']['b'] = '1';
+ERROR:  cannot replace existing key
+DETAIL:  The path assumes key is a composite object, but it is a scalar value.
+update test_jsonb_subscript set test_json['a']['b']['c'] = '1';
+ERROR:  cannot replace existing key
+DETAIL:  The path assumes key is a composite object, but it is a scalar value.
+update test_jsonb_subscript set test_json['a'][0] = '1';
+ERROR:  cannot replace existing key
+DETAIL:  The path assumes key is a composite object, but it is a scalar value.
+update test_jsonb_subscript set test_json['a'][0]['c'] = '1';
+ERROR:  cannot replace existing key
+DETAIL:  The path assumes key is a composite object, but it is a scalar value.
+update test_jsonb_subscript set test_json['a'][0][0] = '1';
+ERROR:  cannot replace existing key
+DETAIL:  The path assumes key is a composite object, but it is a scalar value.
+-- trying replace assuming a composite object, but it's a raw scalar
+delete from test_jsonb_subscript;
+insert into test_jsonb_subscript values (1, 'null');
+update test_jsonb_subscript set test_json[0] = '1';
+ERROR:  cannot replace existing key
+DETAIL:  The path assumes key is a composite object, but it is a scalar value.
+update test_jsonb_subscript set test_json[0][0] = '1';
+ERROR:  cannot replace existing key
+DETAIL:  The path assumes key is a composite object, but it is a scalar value.
 -- jsonb to tsvector
 select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
                                 to_tsvector                                
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 0320db0ea4..1a9d21741f 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -1371,6 +1371,23 @@ insert into test_jsonb_subscript values (1, '{"a": []}');
 update test_jsonb_subscript set test_json['a'][1]['c'][2] = '1';
 select * from test_jsonb_subscript;
 
+-- trying replace assuming a composite object, but it's an element or a value
+
+delete from test_jsonb_subscript;
+insert into test_jsonb_subscript values (1, '{"a": 1}');
+update test_jsonb_subscript set test_json['a']['b'] = '1';
+update test_jsonb_subscript set test_json['a']['b']['c'] = '1';
+update test_jsonb_subscript set test_json['a'][0] = '1';
+update test_jsonb_subscript set test_json['a'][0]['c'] = '1';
+update test_jsonb_subscript set test_json['a'][0][0] = '1';
+
+-- trying replace assuming a composite object, but it's a raw scalar
+
+delete from test_jsonb_subscript;
+insert into test_jsonb_subscript values (1, 'null');
+update test_jsonb_subscript set test_json[0] = '1';
+update test_jsonb_subscript set test_json[0][0] = '1';
+
 -- jsonb to tsvector
 select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
 
-- 
2.21.0

