From 37c1e8658ccdddbea91521d4885399c520f2a68b Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Sun, 14 Jan 2024 21:02:55 +0300 Subject: [PATCH v1] Added support of XML_PARSE_HUGE flag for XML documnets PostgreSQL uses functions of libxml2 library without XML_PARSE_HUGE flag. But in practice, the 10 MB limit is too small. Using libxml2 library functions with support of XML_PARSE_HUGE flag increases maximum size allowed for a single text node from 10.000.000 to 1.000.000.000 (see XML_MAX_TEXT_LENGTH macro) which in most cases solves the problem with insufficient memory. --- contrib/xml2/xpath.c | 7 ++++--- contrib/xml2/xslt_proc.c | 10 ++++++---- src/backend/utils/adt/xml.c | 29 ++++++++++++++++++++++------- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index a692dc6be8..ca34a03b64 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -380,8 +380,8 @@ pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace) PG_TRY(); { - workspace->doctree = xmlParseMemory((char *) VARDATA_ANY(document), - docsize); + workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document), + docsize, NULL, NULL, XML_PARSE_HUGE); if (workspace->doctree != NULL) { workspace->ctxt = xmlXPathNewContext(workspace->doctree); @@ -624,7 +624,8 @@ xpath_table(PG_FUNCTION_ARGS) /* Parse the document */ if (xmldoc) - doctree = xmlParseMemory(xmldoc, strlen(xmldoc)); + doctree = xmlReadMemory(xmldoc, strlen(xmldoc), + NULL, NULL, XML_PARSE_HUGE); else /* treat NULL as not well-formed */ doctree = NULL; diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index 2189bca86f..9b2c69e6e7 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -85,16 +85,18 @@ xslt_process(PG_FUNCTION_ARGS) bool xslt_sec_prefs_error; /* Parse document */ - doctree = xmlParseMemory((char *) VARDATA_ANY(doct), - VARSIZE_ANY_EXHDR(doct)); + doctree = xmlReadMemory((char *) VARDATA_ANY(doct), + VARSIZE_ANY_EXHDR(doct), NULL, NULL, + XML_PARSE_HUGE); if (doctree == NULL) xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "error parsing XML document"); /* Same for stylesheet */ - ssdoc = xmlParseMemory((char *) VARDATA_ANY(ssheet), - VARSIZE_ANY_EXHDR(ssheet)); + ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet), + VARSIZE_ANY_EXHDR(ssheet), NULL, NULL, + XML_PARSE_HUGE); if (ssdoc == NULL) xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 191dd2d1e2..d6f7d95c3c 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1795,7 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, doc = xmlCtxtReadDoc(ctxt, utf8string, NULL, "UTF-8", - XML_PARSE_NOENT | XML_PARSE_DTDATTR + XML_PARSE_NOENT | XML_PARSE_DTDATTR | XML_PARSE_HUGE | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); if (doc == NULL || xmlerrcxt->err_occurred) { @@ -1828,10 +1828,25 @@ xml_parse(text *data, XmlOptionType xmloption_arg, /* allow empty content */ if (*(utf8string + count)) { - res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, - utf8string + count, - parsed_nodes); - if (res_code != 0 || xmlerrcxt->err_occurred) + const char *data; + xmlNodePtr root; + xmlParserErrors xml_error; + xmlNodePtr lst; + + data = (const char *) (utf8string + count); + + /* Create fake root node. */ + root = xmlNewNode(NULL, (const xmlChar *) "content-root"); + if (root == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xml node"); + xmlDocSetRootElement(doc, root); + + /* Try to parse string with using root node context. */ + xml_error = xmlParseInNodeContext(root, data, strlen(data), + XML_PARSE_HUGE, + parsed_nodes ? parsed_nodes : &lst); + if (xml_error != XML_ERR_OK || xmlerrcxt->err_occurred) { xml_errsave(escontext, xmlerrcxt, ERRCODE_INVALID_XML_CONTENT, @@ -4344,7 +4359,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len, - len - xmldecl_len, NULL, NULL, 0); + len - xmldecl_len, NULL, NULL, XML_PARSE_HUGE); if (doc == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "could not parse XML document"); @@ -4675,7 +4690,7 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value) PG_TRY(); { - doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0); + doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, XML_PARSE_HUGE); if (doc == NULL || xtCxt->xmlerrcxt->err_occurred) xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "could not parse XML document"); -- 2.40.1.windows.1