Re: XML with invalid chars

From: Andrew Dunstan <andrew(at)dunslane(dot)net>
To: Noah Misch <noah(at)leadboat(dot)com>
Cc: PostgreSQL-development <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: XML with invalid chars
Date: 2011-04-28 17:51:23
Message-ID: 4DB9A91B.7000401@dunslane.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

On 04/27/2011 05:30 PM, Noah Misch wrote:
>
>> I'm not sure what to do about the back branches and cases where data is
>> already in databases. This is fairly ugly. Suggestions welcome.
> We could provide a script in (or linked from) the release notes for testing the
> data in all your xml columns.
>

Here's a draft. We'd need to come up with slightly modified versions for
older versions of Postgres that don't sport array_agg() and unnest()

cheers

andrew

create function cleanup_xml_table
(schema_name text,table_name text, columns text[])
returns void
language plpgsql as
$func$

declare
cmd text;
cond text;
sep text := '';
alt text := '';
col text;
forbidden text := $$[\x1-\x8\xB\xC\xE-\x1F]$$;
begin
cmd := 'update ' || quote_ident(schema_name) || '.' ||
quote_ident(table_name) || ' set ';
for col in select unnest(columns)
loop
cmd := cmd || sep;
cond := cond || alt;
sep := ', ';
alt := ' or ';
cmd := cmd || quote_ident(col) || '=' ||
'regexp_replace(' || quote_ident(col) , || '::text, ' ||
quote_literal(forbiden) || ', $$$$, $$g$$)::xml';
cond := cond || quote_ident(col) || '::text ~ ' ||
quote_literal(forbidden);
end loop;
cmd := cmd || ' where ' || cond;
execute cmd;
return;
end;

$func$;

select cleanup_xml_table(table_schema,table_name, cols)
from
(select table_schema::text,
table_name::text,
array_agg(column_name::text) as cols
from information_schema.columns
where data_type = 'xml'
and is_updatable = 'yes'
group by table_schema, table_name) xmltabs;

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message Andrew Dunstan 2011-04-28 18:49:07 Re: unknown conversion %m
Previous Message Robert Haas 2011-04-28 16:45:54 Re: SSI non-serializable UPDATE performance