From 86d0bcfe2bb6752c3cf773d28eb7c201cb41bdf0 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Thu, 22 Feb 2024 03:04:41 -0500
Subject: [PATCH v8 5/5] fixes for non-null terminated inputs for incremental
 json parsing

---
 src/bin/pg_combinebackup/load_manifest.c      |  5 ++-
 src/bin/pg_verifybackup/pg_verifybackup.c     |  5 ++-
 src/common/jsonapi.c                          | 43 ++++++++++++++-----
 .../test_json_parser_incremental.c            |  5 ++-
 4 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/src/bin/pg_combinebackup/load_manifest.c b/src/bin/pg_combinebackup/load_manifest.c
index 982be78e28..ae73d01190 100644
--- a/src/bin/pg_combinebackup/load_manifest.c
+++ b/src/bin/pg_combinebackup/load_manifest.c
@@ -172,7 +172,7 @@ load_backup_manifest(char *backup_directory)
 
 		inc_state = json_parse_manifest_incremental_init(&context);
 
-		buffer = pg_malloc(chunk_size + 1);
+		buffer = pg_malloc(chunk_size + 64);
 
 		while (bytes_left > 0)
 		{
@@ -188,7 +188,6 @@ load_backup_manifest(char *backup_directory)
 			else if (bytes_left < 2 * chunk_size)
 				bytes_to_read = bytes_left / 2;
 			rc = read(fd, buffer, bytes_to_read);
-			buffer[rc] = '\0';	/* useful for writing log traces */
 			if (rc != bytes_to_read)
 			{
 				if (rc < 0)
@@ -199,6 +198,8 @@ load_backup_manifest(char *backup_directory)
 							 (long long int)(statbuf.st_size + rc - bytes_left),
 							 (long long int) statbuf.st_size);
 			}
+			/* exercise non-null-terminated chunks */
+			strcpy(buffer + rc,  "1+23 trailing junk");
 			bytes_left -= rc;
 			json_parse_manifest_incremental_chunk(
 												  inc_state, buffer, rc, bytes_left == 0);
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index 02b160f9fc..6eaa376bf0 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -453,7 +453,7 @@ parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p,
 
 		inc_state = json_parse_manifest_incremental_init(&context);
 
-		buffer = pg_malloc(chunk_size + 1);
+		buffer = pg_malloc(chunk_size + 64);
 
 		while (bytes_left > 0)
 		{
@@ -469,7 +469,6 @@ parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p,
 			else if (bytes_left < 2 * chunk_size)
 				bytes_to_read = bytes_left / 2;
 			rc = read(fd, buffer, bytes_to_read);
-			buffer[rc] = '\0';	/* useful for writing log traces */
 			if (rc != bytes_to_read)
 			{
 				if (rc < 0)
@@ -480,6 +479,8 @@ parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p,
 							 (long long int)(statbuf.st_size + rc - bytes_left),
 							 (long long int) statbuf.st_size);
 			}
+			/* test for non-null terminated chunk */
+			strcpy(buffer + rc, "1+23 trailing junk");
 			bytes_left -= rc;
 			json_parse_manifest_incremental_chunk(
 												  inc_state, buffer, rc, bytes_left == 0);
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 25fca8851d..11a22faa18 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -1317,14 +1317,37 @@ json_lex(JsonLexContext *lex)
 			if (c == '-' || (c >= '0' && c <= '9'))
 			{
 				/* for numbers look for possible numeric continuations */
-				size_t		nums = strspn(lex->input, "+-.eE0123456789");
 
-				for (int i = 0; i < nums; i++)
+				bool numend = false;
+
+				for (int i = 0; i < lex->input_length && !numend; i++)
 				{
 					char		cc = lex->input[i];
 
-					appendStringInfoCharMacro(ptok, cc);
-					added++;
+					switch (cc)
+					{
+						case '+':
+						case '-':
+						case 'e':
+						case 'E':
+						case '0':
+						case '1':
+						case '2':
+						case '3':
+						case '4':
+						case '5':
+						case '6':
+						case '7':
+						case '8':
+						case '9':
+							{
+								appendStringInfoCharMacro(ptok, cc);
+								added++;
+							}
+							break;
+						default:
+							numend = true;
+					}
 				}
 			}
 			/* add any remaining alpha_numeric chars */
@@ -1496,8 +1519,8 @@ json_lex(JsonLexContext *lex)
 					if (lex->incremental && !lex->inc_state->is_last_chunk &&
 						p == lex->input + lex->input_length)
 					{
-						appendStringInfoString(
-											   &(lex->inc_state->partial_token), s);
+						appendBinaryStringInfo(
+							&(lex->inc_state->partial_token), s, end  - s);
 						return JSON_INCOMPLETE;
 					}
 
@@ -1554,8 +1577,8 @@ json_lex_string(JsonLexContext *lex)
 	do { \
 		if (lex->incremental && !lex->inc_state->is_last_chunk) \
 		{ \
-			appendStringInfoString(&lex->inc_state->partial_token, \
-								   lex->token_start); \
+			appendBinaryStringInfo(&lex->inc_state->partial_token, \
+								   lex->token_start, end - lex->token_start); \
 			return JSON_INCOMPLETE; \
 		} \
 		lex->token_terminator = s; \
@@ -1893,8 +1916,8 @@ json_lex_number(JsonLexContext *lex, char *s,
 	if (lex->incremental && !lex->inc_state->is_last_chunk &&
 		len >= lex->input_length)
 	{
-		appendStringInfoString(&lex->inc_state->partial_token,
-							   lex->token_start);
+		appendBinaryStringInfo(&lex->inc_state->partial_token,
+							   lex->token_start, s - lex->token_start);
 		return JSON_INCOMPLETE;
 	}
 	else if (num_err != NULL)
diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c
index edb51ef403..dee5c6f7d1 100644
--- a/src/test/modules/test_json_parser/test_json_parser_incremental.c
+++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c
@@ -42,10 +42,11 @@ main(int argc, char **argv)
 	while ((n_read = fread(buff, 1, 60, json_file)) > 0)
 	{
 		appendBinaryStringInfo(&json, buff, n_read);
+		appendStringInfoString(&json, "1+23 trailing junk");
 		if (!feof(json_file))
 		{
 			result = pg_parse_json_incremental(&lex, &nullSemAction,
-											   json.data, json.len,
+											   json.data, n_read,
 											   false);
 			if (result != JSON_INCOMPLETE)
 			{
@@ -59,7 +60,7 @@ main(int argc, char **argv)
 		else
 		{
 			result = pg_parse_json_incremental(&lex, &nullSemAction,
-											   json.data, json.len,
+											   json.data, n_read,
 											   true);
 			if (result != JSON_SUCCESS)
 			{
-- 
2.34.1

