#!/usr/bin/python

import psycopg2
import random
import sys

from datetime import datetime, timedelta, tzinfo
from uuid import uuid4


CREATE_RECORD_COUNT = 10000
WORDS_PER_PAGE = 250
CREATE_TABLE_SQL = """
CREATE TABLE "fax_rxfax" (
    "id" uuid NOT NULL PRIMARY KEY,
    "search_index" tsvector,
    "file_path" varchar(255),
    "time_stamp" timestamp with time zone NOT NULL,
    "remote_id" varchar(60),
    "caller_id" varchar(40),
    "num_pages" integer,
    "ocr_text" text
);
CREATE INDEX fax_rxfax_search_index ON fax_rxfax USING GIN(search_index);
"""


class UTC(tzinfo):
    """
    UTC implementation taken from Python's docs.
    """

    def __repr__(self):
        return "<UTC>"

    def utcoffset(self, dt):
        return timedelta(0)

    def tzname(self, dt):
        return "UTC"

    def dst(self, dt):
        return timedelta(0)

utc = UTC()


def now():
    return datetime.utcnow().replace(tzinfo=utc)


def gen_words(count):
    PUNCTUATION_LIST = ['. ', '. ', '? ', '! ']

    fp = open('words.txt', 'r')
    dictionary_list = []
    for line in fp.readlines():
        dictionary_list.append(line.strip())

    word_list = []
    sentence_len = random.randint(6, 15)
    capitalize = True
    for i in range(0, count):
        if i > 0:
            word_list.append(" ")

        if capitalize:
            word_list.append(random.choice(dictionary_list).capitalize())
            capitalize = False
        else:
            word_list.append(random.choice(dictionary_list))
        sentence_len -= 1
        if sentence_len == 0:
            sentence_len = random.randint(6, 15)
            word_list.append(random.choice(PUNCTUATION_LIST))
            capitalize = True

    word_list.append(random.choice(PUNCTUATION_LIST))

    return "".join(word_list).strip()

def gen_phone_number():
    tn_list = []
    for i in range(0, 10):
        if i == 0:
            tn_list.append("1")
        elif i == 1 or i == 4:
            tn_list.append(str(random.randint(2,9)))
        else:
            tn_list.append(str(random.randint(0,9)))
        i += 1
    return "".join(tn_list)

def main():

    conn = psycopg2.connect("dbname=testdb user=testdbuser host=127.0.0.1 password=testdbuser port=5432")
    cur = conn.cursor()
    res = None

    try:
        cur.execute("SELECT count(*) FROM fax_rxfax;")
        conn.commit()
    except (psycopg2.ProgrammingError):
        conn.rollback()
        cur.execute(CREATE_TABLE_SQL)
        conn.commit()

    for i in xrange(0, CREATE_RECORD_COUNT):
        uuid = str(uuid4())
        time_stamp = now()
        file_path = "/var/tmp/%s.pdf" % (uuid)
        caller_id = gen_phone_number()
        remote_id = "Fax Machine %s" % (caller_id)
        num_pages = random.randint(10, 30)
        ocr_text = gen_words(num_pages * WORDS_PER_PAGE)
        cur.execute("INSERT INTO fax_rxfax (id, file_path, time_stamp, remote_id, caller_id, num_pages, ocr_text) VALUES (%s, %s, %s, %s, %s, %s, %s)",
                    (uuid, file_path, time_stamp, remote_id, caller_id, num_pages, ocr_text))
        conn.commit()
        cur.execute("""UPDATE fax_rxfax SET search_index = setweight( to_tsvector( 'pg_catalog.english', coalesce("ocr_text",'') ), 'A' ) || setweight( to_tsvector( 'pg_catalog.english', coalesce("caller_id",'') ), 'A' ) || setweight( to_tsvector( 'pg_catalog.english', coalesce("remote_id",'') ), 'A' ) WHERE id = (%s);""",
                    (uuid,))
        conn.commit()
        sys.stdout.write("Created record id: %s\n" % uuid)


if __name__ == '__main__':
    main()
