#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>

#define	FIELD_MAX		1024
#define	RECORD_MAX		2048
#define FIELD_CHAR01	"-"
#define FIELD_CHAR02	"*"

#define	DELIM_DEF		","
#define	NUM_RECORDS_DEF	100
#define	NUM_COLUMNS_DEF	5
#define	FIELD_SIZE_DEF	10
#define	INPUT_FILE_DEF	"data.csv"
#define	PARTITIONS_DEF	1
#define	NUM_THREAD		8
#define	FILE_NAME_LEN	32

typedef struct
{
	char	 base_record[8192];			/* original record that would be duplicated */
	char	*delim;						/* delimiter */
	int		 begin;						/* starting record number */
	int		 end;						/* ending record number */
	int		 records;					/* number of records */
	int		 columns;					/* number of attributes */
	int		 field_size;				/* size of one field */
	char	 input_file[FILE_NAME_LEN];	/* input file name */
	int		 partitions;				/* number of partition of input file */
} InputData;

static void usage(void);
static int estimate_record_size(InputData *input);
static void *create_input_file(void *data);
static void CreateInputDataAsPartition(InputData *input);
static void CreateInputDataAsSingle(InputData *input);

static void
usage()
{
	printf("dupdata is a utility to duplicate an input record.\n");
	printf("\n");
	printf("Common options:\n");
	//printf("-d	delimiter for separating column.\n");
	printf("-r	number of records.\n");
	//printf("-c	number of columns.\n");
	//printf("-f	size of field.\n");
	printf("-p	number of partitions.\n");
}

static int
estimate_record_size(InputData *input)
{
	return input->field_size * input->columns + sizeof(int) + input->columns;
}

/* Create the input file.
 *
 * This function is executed in sub-thread.
 */
static void *
create_input_file(void *data)
{
	InputData	*input = data;
	FILE		*product;
	int			 line_no = 0;
	int			 i;

	/* Show input file and record ranges. */
	printf("%s : %d - %d\n", input->input_file, input->begin, input->end);

	product = fopen(input->input_file, "w");
	if (product == NULL)
	{
		fprintf(stderr, "ERROR: failed to open for creating an output:\"%s\"\n",
						input->input_file);
		exit(1);
	}

	for (i = input->begin; i <= input->end; ++i)
	{
		line_no++;
		fprintf(product, "%d,%s", line_no, input->base_record);
	}

	fclose(product);

	return NULL;
}

/* Create single input file.
 *
 * The input file is created by main-thread.
 */
static void
CreateInputDataAsSingle(InputData *input)
{
	/* Add all records into single file. */
	input->partitions = PARTITIONS_DEF;
	strcpy(input->input_file, INPUT_FILE_DEF);

	/* Setup range. */
	input->begin = 1;
	input->end = input->records;

	/* Create input file. */
	create_input_file(input);
}

/* Create input files in partitioning.
 *
 * The input files are created by sub-threads respectively in parallel.
 */
static void
CreateInputDataAsPartition(InputData *input)
{
	int			i;
	int			begin = 1;
	int			end = 0;
	int			range = input->records / input->partitions;
	int			records = 0;
	pthread_t	pthread[NUM_THREAD];
	InputData	*input_t[NUM_THREAD];
	int			rc;

	for (i = 0; i < input->partitions; ++i)
	{
		/* Initialize InputData structure. */
		input_t[i] = (InputData *) malloc(sizeof(InputData));

		memset(input_t[i], 0, sizeof(InputData));
		strcpy(input_t[i]->base_record, input->base_record);
		input_t[i]->delim = input->delim;
		input_t[i]->records = input->records;
		input_t[i]->columns = input->columns;
		input_t[i]->field_size = input->field_size;

		/* Set begin record number. */
		begin = end + 1;
		/* Set end record number. */
		end = begin + range;

		/* Sum records that has been written. */
		records += end;

		/* Last record */
		if (i == input->partitions - 1 &&
			records != input->records)
			end = input->records;

		/* Create InputData structure for writing sub-thread. */
		sprintf(input_t[i]->input_file, "%s_%d", INPUT_FILE_DEF, i + 1);
		input_t[i]->begin = begin;
		input_t[i]->end = end;

		/* Create input file. */
		rc = pthread_create(&pthread[i], NULL, &create_input_file, input_t[i]);
		if (rc < 0)
		{
			fprintf(stderr, "return code from pthread_create() is %d\n", rc);
			free(input_t[i]);
			exit(1);
		}
	}

	for (i = 0; i < input->partitions; ++i)
		pthread_join(pthread[i], NULL);

	for (i = 0; i < input->partitions; ++i)
		free(input_t[i]);
}

int
main(int argc, char *argv[])
{
	int			opt;
	int			rec_size;

	if (argc > 1 && (strcmp(argv[1], "--help") == 0 ||
					 strcmp(argv[1], "-?") == 0))
	{
		usage();
		exit(0);
	}

	/* Initialize InputData structure. */
	InputData   *input = (InputData *) malloc(sizeof(InputData));
	memset(input, 0, sizeof(InputData));

	/* Analysis options */
	while ((opt = getopt(argc, argv, "d:r:c:f:p:")) != -1)
	{
		switch(opt)
		{
			case 'd':
				input->delim = optarg;
				break;
			case 'r':
				input->records = atoi(optarg);
				break;
			case 'c':
				input->columns = atoi(optarg);
				break;
			case 'f':
				input->field_size = atoi(optarg);
				break;
			case 'p':
				input->partitions = atoi(optarg);
				break;
			default:
				fprintf(stderr, "Invalid options is specified.\n");
				fprintf(stderr, "Try \"csv --help\" for more information.\n");
				free(input);
				return 1;
		}
	}

	if (input->delim == NULL)
		input->delim = DELIM_DEF;

	if (input->records == 0)
		input->records = NUM_RECORDS_DEF;

	if (input->columns == 0)
		input->columns = NUM_COLUMNS_DEF;

	if (input->field_size == 0)
		input->field_size = FIELD_SIZE_DEF;

	if (input->field_size >= FIELD_MAX)
	{
		fprintf(stderr, "You must specify field size less than %d.\n", FIELD_MAX);
		free(input);
		exit(1);
	}

	rec_size = estimate_record_size(input);
	if (rec_size > RECORD_MAX)
	{
		fprintf(stderr, "You must specify record size less than %d.%d size record was specified.\n",
				RECORD_MAX, rec_size);
		fprintf(stderr, "Please try to decrease values specified in \"-f\" or \"-c\" option.\n");
		free(input);
		exit(1);
	}

	printf("Input data information.\n");
	printf("-----------------------\n");
	printf("delimiter  : %s\n", input->delim);
	printf("records    : %d\n", input->records);
	printf("columns    : %d\n", input->columns);
	printf("field size : %d\n", input->field_size);
	printf("partitions : %d\n", input->partitions);
	printf("record size: %d\n", rec_size);

	if (fgets(input->base_record, 8192, stdin) == NULL)
		exit(EXIT_FAILURE);

	/* Create INPUT file. */
	if (input->partitions <= 1)	/* single mode */
		CreateInputDataAsSingle(input);
	else	/* partition mode */
		CreateInputDataAsPartition(input);

	free(input);
	return 0;
}

