create schema hashagg_spill;
set search_path to hashagg_spill;

-- start_ignore
CREATE EXTENSION plpythonu;
-- end_ignore

-- Create a function which checks how many batches there are
-- output example: "Memory Usage: 4096kB  Batches: 68  Disk Usage:8625kB"
create or replace function hashagg_spill.num_batches(explain_query text)
returns setof int as
$$
import re
rv = plpy.execute(explain_query)
search_text = 'batches'
result = []
for i in range(len(rv)):
    cur_line = rv[i]['QUERY PLAN']
    if search_text.lower() in cur_line.lower():
        p = re.compile('.*Memory Usage: (\d+).* Batches: (\d+)  Disk.*')
        m = p.match(cur_line)
        batches = int(m.group(2))
        result.append(batches)
return result
$$
language plpythonu;

create table testhagg (i1 int, i2 int, i3 int, i4 int);
insert into testhagg select i,i,i,i from generate_series(1, 30000)i;

set work_mem="1800";

select * from (select max(i1) from testhagg group by i2) foo order by 1 limit 10;
select num_batches > 0 from hashagg_spill.num_batches('explain (analyze, verbose) select max(i1) from testhagg group by i2;') num_batches;
select num_batches > 3 from hashagg_spill.num_batches('explain (analyze, verbose) select max(i1) from testhagg group by i2 limit 90000;') num_batches;

reset all;
set search_path to hashagg_spill;

-- Test agg spilling scenarios
create table aggspill (i int, j int, t text);
insert into aggspill select i, i*2, i::text from generate_series(1, 10000) i;
insert into aggspill select i, i*2, i::text from generate_series(1, 100000) i;
insert into aggspill select i, i*2, i::text from generate_series(1, 1000000) i;

-- No spill with large statement memory
set work_mem = '125MB';
select * from hashagg_spill.num_batches('explain (analyze, verbose)
select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 1) g;');

-- Reduce the statement memory to induce spilling
set work_mem = '10MB';
select num_batches > 32 from hashagg_spill.num_batches('explain (analyze, verbose)
select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 2) g;') num_batches;

-- Reduce the statement memory, more batches
set work_mem = '5MB';

select num_batches > 64 from hashagg_spill.num_batches('explain (analyze, verbose)
select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 3) g;') num_batches;

-- Check spilling to a temp tablespace
SET work_mem='1000kB';

CREATE TABLE hashagg_spill(col1 numeric, col2 int);
INSERT INTO hashagg_spill SELECT id, 1 FROM generate_series(1,20000) id;
ANALYZE hashagg_spill;

CREATE TABLE spill_temptblspace (a numeric);
SET temp_tablespaces=pg_default;
INSERT INTO spill_temptblspace SELECT avg(col2) col2 FROM hashagg_spill GROUP BY col1 HAVING(sum(col1)) < 0;
RESET temp_tablespaces;
RESET work_mem;

drop schema hashagg_spill cascade;