#!/usr/bin/env python

import sys
from math import log10

def arg(n, default=None, func=lambda x: x):
    return func( len(sys.argv) > n and sys.argv[n] or default )

if len(sys.argv) < 2:
    print """
Usage:
    %s ntabs nrows npadcols padlen scalearg batchsize
""" % sys.argv[0]
    sys.exit(1)

ntabs = arg(1, '40000', int)      # 40k tables
nrows = arg(2, '1000', int)       # 1k rows
npadcols = arg(3, '5', int)       # 5 padding columns
padlen = arg(4, '20', int)        # of 20 bytes each
scalearg = arg(5, '10', int)      # rowcount scaling, here 10 per decade, ie every 10th
                                  # table gets 10x rows, every 100th gets 100x rows
batchsize = arg(6, '1', int)      # tables per transaction

tabname = 't_%%0%dd' % (int(log10(ntabs)+1),)
# for generating padding columns
padname = 'pad_%%0%dd' % (len(str(npadcols)),)
paddecl   = ', '.join(( padname % (n,) + ' text'             for n in range(npadcols) ))
padtarget = ', '.join(( padname % (n,)                       for n in range(npadcols) ))
padvalues = ', '.join(( "'" + chr(ord('0') + n)*padlen + "'" for n in range(npadcols) ))

def pglog(msg):
    return """select '%s' as "Notice";""" % (msg,)

def setup():
    return """
drop schema if exists avac cascade;
create schema avac;
create table if not exists avac.t_list (
    tabno int primary key,
    relname text,
    nrows int
);
"""

def make_table(tabno, nrows):
    return """
-- %(table)s
insert into avac.t_list(tabno, relname, nrows)
            values(%(tabno)d, '%(table)s', %(nrows)d);
create table avac.%(table)s (
    rno int,
    tabno int default %(tabno)d,
    revision int default 0,
    ctime timestamptz default now(),
    mtime timestamptz default now(),
    %(paddecl)s
);
insert into avac.%(table)s (rno, %(padtarget)s)
  select rno, %(padvalues)s
    from generate_series(1, %(nrows)d) rowgen(rno);
alter table avac.%(table)s add constraint
    %(table)s_pkey primary key(rno);
-- create unique index %(table)s_rev_idx on
--     avac.%(table)s (revision, rno);
""" % dict(tabno = tabno, nrows = nrows, table = tabname % (tabno,),
           paddecl = paddecl, padtarget = padtarget, padvalues = padvalues)

def scale(n, factor=1):
    """Scale by factor for each trailing 0 in n up to 3"""
    return factor**(n and max(p for p in range(0,4) if n % 10**p == 0 and n >= 10**p) or 0)

print pglog('DATAGEN setup start.')
print setup()
print pglog('DATAGEN setting up: %d tables, %d rows with %d cols of %d chars padding.' % (ntabs, nrows, npadcols, padlen))
txn = False
for tno in range(ntabs):
    if txn and tno % batchsize == 0:
        print 'commit;'
        txn = False
    if not txn:
        print 'begin;'
        txn = True
    n = nrows * scale(tno, scalearg)
    print make_table(tno, n)
if txn:
    print 'commit;'
print pglog('DATAGEN setup done.')

