import psycopg

DB_CONN_STRING = "dbname=badutf8 port=5435"

con = psycopg.connect(DB_CONN_STRING)
cur = con.cursor()

def setup_database():
    with psycopg.connect(DB_CONN_STRING) as conn:
        conn.execute("""
            CREATE TABLE IF NOT EXISTS bademail (
                id SERIAL PRIMARY KEY,
                subject TEXT,
                sender TEXT,
                date TEXT,
                body TEXT
            );
        """)
        conn.commit()

query = "INSERT INTO bademail (subject, sender, date, body) VALUES (%s, %s, %s, %s)"

params = [
    'Re: [PATCH] Add RetrieveInstrumentation hook for CustomScan providers',
    'Siddharth Kothari <sidkot@google.com>',
    'Wed, 08 Apr 2026 10:57:16 +0530',
    "Hi everyone,\n\nI\xe2\x80\x99m just checking in to see if anyone has had a chance to look at this or\nif there\xe2\x80\x99s any further information I should provide to help with the\nreview. I have also added the patch to PG20-1 CF queue, the link is\nhttps://commitfest.postgresql.org/patch/6524/.\n\nThanks,\nSiddharth\n\nOn Wed, Feb 18, 2026 at 3:09\xe2\x80\xafPM Siddharth Kothari <sidkot@google.com> wrote:\n\n> Dear PostgreSQL Hackers,\n>\n> This email proposes a patch to enhance the CustomScan provider interface.\n> The patch file,\n> 0001-Add-RetrieveInstrumentationCustomScan-hook-for-Custo.patch, is\n> attached.\n>\n> *Problem:*\n>\n> CustomScan providers currently lack a standard method to aggregate\n> instrumentation data from parallel workers back to the leader process\n> before the Dynamic Shared Memory (DSM) segment is unlinked. This makes it\n> difficult to gather comprehensive performance metrics from parallelized\n> custom scans.\n>\n> *Solution:*\n>\n> This patch introduces a new optional hook,\n> RetrieveInstrumentationCustomScan, to the CustomExecMethods struct. This\n> hook allows custom scan providers to implement logic to collect and\n> consolidate instrumentation from shared memory or worker states during the\n> parallel query cleanup phase. This hook is invoked via the new\n> ExecCustomScanRetrieveInstrumentation function, called from\n> ExecParallelRetrieveInstrumentation for T_CustomScanState nodes. Since\n> the hook is optional (checked for NULL before calling), it maintains full\n> backward compatibility.\n>\n> *Testing & Compatibility:*\n>\n>    - The patch compiles and passes all core regression tests (make\n>    check-world) on my x86_64 instance.\n>    - The changes are not platform-specific.\n>    - Regression Tests: This patch provides a new *capability* for custom\n>    scan providers. Since the hook\'s functionality is only realized when\n>    implemented by an extension, specific tests would naturally reside within\n>    that extension rather than in the core regression suite.\n>\n> This patch does not directly address a specific item on the official TODO\n> list but enhances the extensibility framework.\n>\n> I believe this patch is complete and ready for review. I look forward to\n> any feedback and am happy to make revisions. I will also add this patch to\n> the next CommitFest.\n>\n> Thank you,\n>\n> Siddharth Kothari\n>)"
]

def insert_bad_data():
    with psycopg.connect(DB_CONN_STRING) as conn:
        with conn.cursor() as cur:
            cur.execute(query, params)
        conn.commit()

if __name__ == "__main__":
    setup_database()
    insert_bad_data()

"""
badutf8=# select ctid, id, substring(body from '^.{4}') from  bademail;
 ctid  │ id │ substring 
───────┼────┼───────────
 (0,1) │  1 │ Hi e
(1 row)

badutf8=# select ctid, id, substring(body, 1, 4) from  bademail;
ERROR:  22021: invalid byte sequence for encoding "UTF8": 0xc2
LOCATION:  report_invalid_encoding_int, mbutils.c:1847

badutf8=# select ctid, id, substring(normalize(body), 1, 4) from  bademail;
 ctid  │ id │ substring 
───────┼────┼───────────
 (0,1) │  1 │ Hi e
(1 row)

badutf8=# select ctid, id, length(body), length(normalize(body)), body=normalize(body) from  bademail;
 ctid  │ id │ length │ length │ ?column? 
───────┼────┼────────┼────────┼──────────
 (0,1) │  1 │   2314 │   2314 │ t
(1 row)
"""
