#!/usr/bin/python

# python json-generate.py NUM_OF_DOCUMENT NEST_LEVELS KEYS_PER_LEVEL UNIQUE_KEYS KEY_LENGTH UNIQUE_VALUES

import psycopg2
import hashlib
import random
import sys

from psycopg2.extras import Json

# number of document to generate
ndocuments = int(sys.argv[1])

# number of levels of nesting
nlevels = int(sys.argv[2])

# number of distinct paths
nkeys_unique = int(sys.argv[3])

# number of paths at each level (so total npaths_per_level ** nlevels)
nkeys_per_level = int(sys.argv[4])

# string length
keys_length = int(sys.argv[5])

# unique values (in all paths combined)
nvalues_unique = int(sys.argv[6])


# pre-generate random paths
paths = [hashlib.md5(str(i).encode()).hexdigest()[0:keys_length] for i in range(0,nkeys_unique)]

# pick paths randomly from the pre-generated array
def generate_paths(npaths):
	return random.choices(paths, k=npaths)

# generate random value (simply an integer in a given range)
def generate_value():
	return random.randint(0, nvalues_unique)

# generate document with nlevels, recursively
def generate_document(npaths, nlevels):

	# this is the deepest level, so generate random
	if nlevels == 0:
		return generate_value()

	# not the deepest level, so generate a dictionary with paths/values
	doc = {}

	paths = generate_paths(npaths)

	# for each path, generate a sub-document (or a value)
	for p in paths:
		doc.update({p : generate_document(npaths, nlevels-1)})

	return doc


if __name__ == '__main__':

	conn = psycopg2.connect('host=localhost dbname=json_test')
	cur = conn.cursor()

	cur.execute('begin')

	for i in range(0, ndocuments):

		# batches
		if i > 0 and i % 1000 == 0:
			cur.execute('commit')
			cur.execute('begin')

		cur.execute('insert into test_table (v) values (%(json)s)', {'json' : Json(generate_document(nkeys_per_level, nlevels))})

	if i % 1000 != 0:
		cur.execute('commit')
