From: | Andrew Dunstan <andrew(at)dunslane(dot)net> |
---|---|
To: | Noah Misch <noah(at)leadboat(dot)com> |
Cc: | PostgreSQL-development <pgsql-hackers(at)postgresql(dot)org> |
Subject: | Re: XML with invalid chars |
Date: | 2011-04-28 17:51:23 |
Message-ID: | 4DB9A91B.7000401@dunslane.net |
Views: | Raw Message | Whole Thread | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-hackers |
On 04/27/2011 05:30 PM, Noah Misch wrote:
>
>> I'm not sure what to do about the back branches and cases where data is
>> already in databases. This is fairly ugly. Suggestions welcome.
> We could provide a script in (or linked from) the release notes for testing the
> data in all your xml columns.
>
Here's a draft. We'd need to come up with slightly modified versions for
older versions of Postgres that don't sport array_agg() and unnest()
cheers
andrew
create function cleanup_xml_table
(schema_name text,table_name text, columns text[])
returns void
language plpgsql as
$func$
declare
cmd text;
cond text;
sep text := '';
alt text := '';
col text;
forbidden text := $$[\x1-\x8\xB\xC\xE-\x1F]$$;
begin
cmd := 'update ' || quote_ident(schema_name) || '.' ||
quote_ident(table_name) || ' set ';
for col in select unnest(columns)
loop
cmd := cmd || sep;
cond := cond || alt;
sep := ', ';
alt := ' or ';
cmd := cmd || quote_ident(col) || '=' ||
'regexp_replace(' || quote_ident(col) , || '::text, ' ||
quote_literal(forbiden) || ', $$$$, $$g$$)::xml';
cond := cond || quote_ident(col) || '::text ~ ' ||
quote_literal(forbidden);
end loop;
cmd := cmd || ' where ' || cond;
execute cmd;
return;
end;
$func$;
select cleanup_xml_table(table_schema,table_name, cols)
from
(select table_schema::text,
table_name::text,
array_agg(column_name::text) as cols
from information_schema.columns
where data_type = 'xml'
and is_updatable = 'yes'
group by table_schema, table_name) xmltabs;
From | Date | Subject | |
---|---|---|---|
Next Message | Andrew Dunstan | 2011-04-28 18:49:07 | Re: unknown conversion %m |
Previous Message | Robert Haas | 2011-04-28 16:45:54 | Re: SSI non-serializable UPDATE performance |